From 7a42647f70373de25437cbab59fc2110ec105634 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Thu, 16 Jun 2022 02:03:12 +0200 Subject: [PATCH 0001/1056] random: schedule mix_interrupt_randomness() less often commit 534d2eaf1970274150596fdd2bf552721e65d6b2 upstream. It used to be that mix_interrupt_randomness() would credit 1 bit each time it ran, and so add_interrupt_randomness() would schedule mix() to run every 64 interrupts, a fairly arbitrary number, but nonetheless considered to be a decent enough conservative estimate. Since e3e33fc2ea7f ("random: do not use input pool from hard IRQs"), mix() is now able to credit multiple bits, depending on the number of calls to add(). This was done for reasons separate from this commit, but it has the nice side effect of enabling this patch to schedule mix() less often. Currently the rules are: a) Credit 1 bit for every 64 calls to add(). b) Schedule mix() once a second that add() is called. c) Schedule mix() once every 64 calls to add(). Rules (a) and (c) no longer need to be coupled. It's still important to have _some_ value in (c), so that we don't "over-saturate" the fast pool, but the once per second we get from rule (b) is a plenty enough baseline. So, by increasing the 64 in rule (c) to something larger, we avoid calling queue_work_on() as frequently during irq storms. This commit changes that 64 in rule (c) to be 1024, which means we schedule mix() 16 times less often. And it does *not* need to change the 64 in rule (a). Fixes: 58340f8e952b ("random: defer fast pool mixing to worker") Cc: stable@vger.kernel.org Cc: Dominik Brodowski Acked-by: Sebastian Andrzej Siewior Signed-off-by: Jason A. Donenfeld Signed-off-by: Greg Kroah-Hartman --- drivers/char/random.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 7f6585c49380bc..bdb4c9508872f1 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -1000,7 +1000,7 @@ void add_interrupt_randomness(int irq) if (new_count & MIX_INFLIGHT) return; - if (new_count < 64 && !time_is_before_jiffies(fast_pool->last + HZ)) + if (new_count < 1024 && !time_is_before_jiffies(fast_pool->last + HZ)) return; if (unlikely(!fast_pool->mix.func)) From eb4c99d089c0bffbbcd943688a1480972e70c36f Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Thu, 16 Jun 2022 15:00:51 +0200 Subject: [PATCH 0002/1056] random: quiet urandom warning ratelimit suppression message commit c01d4d0a82b71857be7449380338bc53dde2da92 upstream. random.c ratelimits how much it warns about uninitialized urandom reads using __ratelimit(). When the RNG is finally initialized, it prints the number of missed messages due to ratelimiting. It has been this way since that functionality was introduced back in 2018. Recently, cc1e127bfa95 ("random: remove ratelimiting for in-kernel unseeded randomness") put a bit more stress on the urandom ratelimiting, which teased out a bug in the implementation. Specifically, when under pressure, __ratelimit() will print its own message and reset the count back to 0, making the final message at the end less useful. Secondly, it does so as a pr_warn(), which apparently is undesirable for people's CI. Fortunately, __ratelimit() has the RATELIMIT_MSG_ON_RELEASE flag exactly for this purpose, so we set the flag. Fixes: 4e00b339e264 ("random: rate limit unseeded randomness warnings") Cc: stable@vger.kernel.org Reported-by: Jon Hunter Reported-by: Ron Economos Tested-by: Ron Economos Signed-off-by: Jason A. Donenfeld Signed-off-by: Greg Kroah-Hartman --- drivers/char/random.c | 2 +- include/linux/ratelimit_types.h | 12 ++++++++---- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index bdb4c9508872f1..d6f2c62020b5de 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -88,7 +88,7 @@ static RAW_NOTIFIER_HEAD(random_ready_chain); /* Control how we warn userspace. */ static struct ratelimit_state urandom_warning = - RATELIMIT_STATE_INIT("warn_urandom_randomness", HZ, 3); + RATELIMIT_STATE_INIT_FLAGS("urandom_warning", HZ, 3, RATELIMIT_MSG_ON_RELEASE); static int ratelimit_disable __read_mostly = IS_ENABLED(CONFIG_WARN_ALL_UNSEEDED_RANDOM); module_param_named(ratelimit_disable, ratelimit_disable, int, 0644); diff --git a/include/linux/ratelimit_types.h b/include/linux/ratelimit_types.h index b676aa419eef87..f0e535f199bef5 100644 --- a/include/linux/ratelimit_types.h +++ b/include/linux/ratelimit_types.h @@ -23,12 +23,16 @@ struct ratelimit_state { unsigned long flags; }; -#define RATELIMIT_STATE_INIT(name, interval_init, burst_init) { \ - .lock = __RAW_SPIN_LOCK_UNLOCKED(name.lock), \ - .interval = interval_init, \ - .burst = burst_init, \ +#define RATELIMIT_STATE_INIT_FLAGS(name, interval_init, burst_init, flags_init) { \ + .lock = __RAW_SPIN_LOCK_UNLOCKED(name.lock), \ + .interval = interval_init, \ + .burst = burst_init, \ + .flags = flags_init, \ } +#define RATELIMIT_STATE_INIT(name, interval_init, burst_init) \ + RATELIMIT_STATE_INIT_FLAGS(name, interval_init, burst_init, 0) + #define RATELIMIT_STATE_INIT_DISABLED \ RATELIMIT_STATE_INIT(ratelimit_state, 0, DEFAULT_RATELIMIT_BURST) From 535abf6207ef6d3d83367a97f9e0e52f94e40653 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 20 Jun 2022 12:40:08 +0200 Subject: [PATCH 0003/1056] ALSA: hda/via: Fix missing beep setup commit c7807b27d510e5aa53c8a120cfc02c33c24ebb5f upstream. Like the previous fix for Conexant codec, the beep_nid has to be set up before calling snd_hda_gen_parse_auto_config(); otherwise it'd miss the path setup. Fix the call order for addressing the missing beep setup. Fixes: 0e8f9862493a ("ALSA: hda/via - Simplify control management") Cc: Link: https://bugzilla.kernel.org/show_bug.cgi?id=216152 Link: https://lore.kernel.org/r/20220620104008.1994-2-tiwai@suse.de Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_via.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/pci/hda/patch_via.c b/sound/pci/hda/patch_via.c index 773a136161f11a..a188901a83bbe7 100644 --- a/sound/pci/hda/patch_via.c +++ b/sound/pci/hda/patch_via.c @@ -520,11 +520,11 @@ static int via_parse_auto_config(struct hda_codec *codec) if (err < 0) return err; - err = snd_hda_gen_parse_auto_config(codec, &spec->gen.autocfg); + err = auto_parse_beep(codec); if (err < 0) return err; - err = auto_parse_beep(codec); + err = snd_hda_gen_parse_auto_config(codec, &spec->gen.autocfg); if (err < 0) return err; From d12a5d1b5053548fdfdcb8e9b45b03a68408bb86 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 20 Jun 2022 12:40:07 +0200 Subject: [PATCH 0004/1056] ALSA: hda/conexant: Fix missing beep setup commit 5faa0bc69102f3a4c605581564c367be5eb94dfa upstream. Currently the Conexant codec driver sets up the beep NID after calling snd_hda_gen_parse_auto_config(). It turned out that this results in the insufficient setup for the beep control, as the generic parser handles the fake path in snd_hda_gen_parse_auto_config() only if the beep_nid is set up beforehand. For dealing with the beep widget properly, call cx_auto_parse_beep() before snd_hda_gen_parse_auto_config() call. Fixes: 51e19ca5f755 ("ALSA: hda/conexant - Clean up beep code") Cc: Link: https://bugzilla.kernel.org/show_bug.cgi?id=216152 Link: https://lore.kernel.org/r/20220620104008.1994-1-tiwai@suse.de Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_conexant.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c index bce2cef80000b4..0b7d500249f6e6 100644 --- a/sound/pci/hda/patch_conexant.c +++ b/sound/pci/hda/patch_conexant.c @@ -1079,11 +1079,11 @@ static int patch_conexant_auto(struct hda_codec *codec) if (err < 0) goto error; - err = snd_hda_gen_parse_auto_config(codec, &spec->gen.autocfg); + err = cx_auto_parse_beep(codec); if (err < 0) goto error; - err = cx_auto_parse_beep(codec); + err = snd_hda_gen_parse_auto_config(codec, &spec->gen.autocfg); if (err < 0) goto error; From 82e29e99f2ab14f769fb352d5ef66730d7d5a0ec Mon Sep 17 00:00:00 2001 From: Soham Sen Date: Thu, 9 Jun 2022 23:49:20 +0530 Subject: [PATCH 0005/1056] ALSA: hda/realtek: Add mute LED quirk for HP Omen laptop commit b2e6b3d9bbb0a59ba7c710cc06e44cc548301f5f upstream. The HP Omen 15 laptop needs a quirk to toggle the mute LED. It already is implemented for a different variant of the HP Omen laptop so a fixup entry is needed for this variant. Signed-off-by: Soham Sen Cc: Link: https://lore.kernel.org/r/20220609181919.45535-1-contact@sohamsen.me Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 4903a857f8f643..45abd677682cde 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -8830,6 +8830,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { ALC285_FIXUP_HP_GPIO_AMP_INIT), SND_PCI_QUIRK(0x103c, 0x8783, "HP ZBook Fury 15 G7 Mobile Workstation", ALC285_FIXUP_HP_GPIO_AMP_INIT), + SND_PCI_QUIRK(0x103c, 0x8787, "HP OMEN 15", ALC285_FIXUP_HP_MUTE_LED), SND_PCI_QUIRK(0x103c, 0x8788, "HP OMEN 15", ALC285_FIXUP_HP_MUTE_LED), SND_PCI_QUIRK(0x103c, 0x87c8, "HP", ALC287_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x87e5, "HP ProBook 440 G8 Notebook PC", ALC236_FIXUP_HP_GPIO_LED), From c478ceb4342b865180661ade7723c02849d96769 Mon Sep 17 00:00:00 2001 From: Kailang Yang Date: Mon, 13 Jun 2022 14:57:19 +0800 Subject: [PATCH 0006/1056] ALSA: hda/realtek - ALC897 headset MIC no sound commit fe6900bd8156467365bd5b976df64928fdebfeb0 upstream. There is not have Headset Mic verb table in BIOS default. So, it will have recording issue from headset MIC. Add the verb table value without jack detect. It will turn on Headset Mic. Signed-off-by: Kailang Yang Cc: Link: https://lore.kernel.org/r/719133a27d8844a890002cb817001dfa@realtek.com Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 45abd677682cde..9339042a0cd769 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -10522,6 +10522,7 @@ enum { ALC668_FIXUP_MIC_DET_COEF, ALC897_FIXUP_LENOVO_HEADSET_MIC, ALC897_FIXUP_HEADSET_MIC_PIN, + ALC897_FIXUP_HP_HSMIC_VERB, }; static const struct hda_fixup alc662_fixups[] = { @@ -10941,6 +10942,13 @@ static const struct hda_fixup alc662_fixups[] = { .chained = true, .chain_id = ALC897_FIXUP_LENOVO_HEADSET_MIC }, + [ALC897_FIXUP_HP_HSMIC_VERB] = { + .type = HDA_FIXUP_PINS, + .v.pins = (const struct hda_pintbl[]) { + { 0x19, 0x01a1913c }, /* use as headset mic, without its own jack detect */ + { } + }, + }, }; static const struct snd_pci_quirk alc662_fixup_tbl[] = { @@ -10966,6 +10974,7 @@ static const struct snd_pci_quirk alc662_fixup_tbl[] = { SND_PCI_QUIRK(0x1028, 0x0698, "Dell", ALC668_FIXUP_DELL_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x069f, "Dell", ALC668_FIXUP_DELL_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x103c, 0x1632, "HP RP5800", ALC662_FIXUP_HP_RP5800), + SND_PCI_QUIRK(0x103c, 0x8719, "HP", ALC897_FIXUP_HP_HSMIC_VERB), SND_PCI_QUIRK(0x103c, 0x873e, "HP", ALC671_FIXUP_HP_HEADSET_MIC2), SND_PCI_QUIRK(0x103c, 0x885f, "HP 288 Pro G8", ALC671_FIXUP_HP_HEADSET_MIC2), SND_PCI_QUIRK(0x1043, 0x1080, "Asus UX501VW", ALC668_FIXUP_HEADSET_MODE), From dffaf580c3c2dfef716ec72bcaba2c440c14c922 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 14 Jun 2022 07:48:31 +0200 Subject: [PATCH 0007/1056] ALSA: hda/realtek: Apply fixup for Lenovo Yoga Duet 7 properly commit 56ec3e755bd1041d35bdec020a99b327697ee470 upstream. It turned out that Lenovo shipped two completely different products with the very same PCI SSID, where both require different quirks; namely, Lenovo C940 has already the fixup for its speaker (ALC298_FIXUP_LENOVO_SPK_VOLUME) with the PCI SSID 17aa:3818, while Yoga Duet 7 has also the very same PCI SSID but requires a different quirk, ALC287_FIXUP_YOGA7_14TIL_SPEAKERS. Fortunately, both are with different codecs (C940 with ALC298 and Duet 7 with ALC287), hence we can apply different fixes by checking the codec ID. This patch implements that special fixup function. For easier handling, the internal function for applying a specific fixup entry is exported as __snd_hda_apply_fixup(), so that it can be called from the codec driver. The rest is simply calling it with a different fixup ID depending on the codec ID. Reported-by: Hans de Goede Tested-by: nikitashvets@flyium.com Cc: Link: https://lore.kernel.org/r/5ca147d1-3a2d-60c6-c491-8aa844183222@redhat.com Link: https://lore.kernel.org/r/20220614054831.14648-1-tiwai@suse.de Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/hda_auto_parser.c | 7 ++++--- sound/pci/hda/hda_local.h | 1 + sound/pci/hda/patch_realtek.c | 24 +++++++++++++++++++++++- 3 files changed, 28 insertions(+), 4 deletions(-) diff --git a/sound/pci/hda/hda_auto_parser.c b/sound/pci/hda/hda_auto_parser.c index 500d0d474d27b5..9cd0d61ab26d55 100644 --- a/sound/pci/hda/hda_auto_parser.c +++ b/sound/pci/hda/hda_auto_parser.c @@ -823,7 +823,7 @@ static void set_pin_targets(struct hda_codec *codec, snd_hda_set_pin_ctl_cache(codec, cfg->nid, cfg->val); } -static void apply_fixup(struct hda_codec *codec, int id, int action, int depth) +void __snd_hda_apply_fixup(struct hda_codec *codec, int id, int action, int depth) { const char *modelname = codec->fixup_name; @@ -833,7 +833,7 @@ static void apply_fixup(struct hda_codec *codec, int id, int action, int depth) if (++depth > 10) break; if (fix->chained_before) - apply_fixup(codec, fix->chain_id, action, depth + 1); + __snd_hda_apply_fixup(codec, fix->chain_id, action, depth + 1); switch (fix->type) { case HDA_FIXUP_PINS: @@ -874,6 +874,7 @@ static void apply_fixup(struct hda_codec *codec, int id, int action, int depth) id = fix->chain_id; } } +EXPORT_SYMBOL_GPL(__snd_hda_apply_fixup); /** * snd_hda_apply_fixup - Apply the fixup chain with the given action @@ -883,7 +884,7 @@ static void apply_fixup(struct hda_codec *codec, int id, int action, int depth) void snd_hda_apply_fixup(struct hda_codec *codec, int action) { if (codec->fixup_list) - apply_fixup(codec, codec->fixup_id, action, 0); + __snd_hda_apply_fixup(codec, codec->fixup_id, action, 0); } EXPORT_SYMBOL_GPL(snd_hda_apply_fixup); diff --git a/sound/pci/hda/hda_local.h b/sound/pci/hda/hda_local.h index 8621f576446b85..63c00363acad9d 100644 --- a/sound/pci/hda/hda_local.h +++ b/sound/pci/hda/hda_local.h @@ -350,6 +350,7 @@ void snd_hda_apply_verbs(struct hda_codec *codec); void snd_hda_apply_pincfgs(struct hda_codec *codec, const struct hda_pintbl *cfg); void snd_hda_apply_fixup(struct hda_codec *codec, int action); +void __snd_hda_apply_fixup(struct hda_codec *codec, int id, int action, int depth); void snd_hda_pick_fixup(struct hda_codec *codec, const struct hda_model_fixup *models, const struct snd_pci_quirk *quirk, diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 9339042a0cd769..04b3a1f9ceef2c 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -6883,6 +6883,7 @@ enum { ALC287_FIXUP_LEGION_15IMHG05_SPEAKERS, ALC287_FIXUP_LEGION_15IMHG05_AUTOMUTE, ALC287_FIXUP_YOGA7_14ITL_SPEAKERS, + ALC298_FIXUP_LENOVO_C940_DUET7, ALC287_FIXUP_13S_GEN2_SPEAKERS, ALC256_FIXUP_SET_COEF_DEFAULTS, ALC256_FIXUP_SYSTEM76_MIC_NO_PRESENCE, @@ -6892,6 +6893,23 @@ enum { ALC285_FIXUP_LEGION_Y9000X_AUTOMUTE, }; +/* A special fixup for Lenovo C940 and Yoga Duet 7; + * both have the very same PCI SSID, and we need to apply different fixups + * depending on the codec ID + */ +static void alc298_fixup_lenovo_c940_duet7(struct hda_codec *codec, + const struct hda_fixup *fix, + int action) +{ + int id; + + if (codec->core.vendor_id == 0x10ec0298) + id = ALC298_FIXUP_LENOVO_SPK_VOLUME; /* C940 */ + else + id = ALC287_FIXUP_YOGA7_14ITL_SPEAKERS; /* Duet 7 */ + __snd_hda_apply_fixup(codec, id, action, 0); +} + static const struct hda_fixup alc269_fixups[] = { [ALC269_FIXUP_GPIO2] = { .type = HDA_FIXUP_FUNC, @@ -8591,6 +8609,10 @@ static const struct hda_fixup alc269_fixups[] = { .chained = true, .chain_id = ALC269_FIXUP_HEADSET_MODE, }, + [ALC298_FIXUP_LENOVO_C940_DUET7] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc298_fixup_lenovo_c940_duet7, + }, [ALC287_FIXUP_13S_GEN2_SPEAKERS] = { .type = HDA_FIXUP_VERBS, .v.verbs = (const struct hda_verb[]) { @@ -9060,7 +9082,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x17aa, 0x31af, "ThinkCentre Station", ALC623_FIXUP_LENOVO_THINKSTATION_P340), SND_PCI_QUIRK(0x17aa, 0x3802, "Lenovo Yoga DuetITL 2021", ALC287_FIXUP_YOGA7_14ITL_SPEAKERS), SND_PCI_QUIRK(0x17aa, 0x3813, "Legion 7i 15IMHG05", ALC287_FIXUP_LEGION_15IMHG05_SPEAKERS), - SND_PCI_QUIRK(0x17aa, 0x3818, "Lenovo C940", ALC298_FIXUP_LENOVO_SPK_VOLUME), + SND_PCI_QUIRK(0x17aa, 0x3818, "Lenovo C940 / Yoga Duet 7", ALC298_FIXUP_LENOVO_C940_DUET7), SND_PCI_QUIRK(0x17aa, 0x3819, "Lenovo 13s Gen2 ITL", ALC287_FIXUP_13S_GEN2_SPEAKERS), SND_PCI_QUIRK(0x17aa, 0x3820, "Yoga Duet 7 13ITL6", ALC287_FIXUP_YOGA7_14ITL_SPEAKERS), SND_PCI_QUIRK(0x17aa, 0x3824, "Legion Y9000X 2020", ALC285_FIXUP_LEGION_Y9000X_SPEAKERS), From 48e3b93cda56da9af65efb065aa6888132986a36 Mon Sep 17 00:00:00 2001 From: Tim Crawford Date: Fri, 17 Jun 2022 07:30:28 -0600 Subject: [PATCH 0008/1056] ALSA: hda/realtek: Add quirk for Clevo PD70PNT commit d49951219b0249d3eff49e4f02e0de82357bc8a0 upstream. Fixes speaker output and headset detection on Clevo PD70PNT. Signed-off-by: Tim Crawford Cc: Link: https://lore.kernel.org/r/20220617133028.50568-1-tcrawford@system76.com Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 04b3a1f9ceef2c..5639356c02940d 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -2629,6 +2629,7 @@ static const struct snd_pci_quirk alc882_fixup_tbl[] = { SND_PCI_QUIRK(0x1558, 0x67e1, "Clevo PB71[DE][CDF]", ALC1220_FIXUP_CLEVO_PB51ED_PINS), SND_PCI_QUIRK(0x1558, 0x67e5, "Clevo PC70D[PRS](?:-D|-G)?", ALC1220_FIXUP_CLEVO_PB51ED_PINS), SND_PCI_QUIRK(0x1558, 0x67f1, "Clevo PC70H[PRS]", ALC1220_FIXUP_CLEVO_PB51ED_PINS), + SND_PCI_QUIRK(0x1558, 0x67f5, "Clevo PD70PN[NRT]", ALC1220_FIXUP_CLEVO_PB51ED_PINS), SND_PCI_QUIRK(0x1558, 0x70d1, "Clevo PC70[ER][CDF]", ALC1220_FIXUP_CLEVO_PB51ED_PINS), SND_PCI_QUIRK(0x1558, 0x7714, "Clevo X170SM", ALC1220_FIXUP_CLEVO_PB51ED_PINS), SND_PCI_QUIRK(0x1558, 0x7715, "Clevo X170KM-G", ALC1220_FIXUP_CLEVO_PB51ED), From 56c6f1fcd594b66a8f61e55e8df7b7197062ef4e Mon Sep 17 00:00:00 2001 From: Tim Crawford Date: Wed, 22 Jun 2022 09:00:17 -0600 Subject: [PATCH 0009/1056] ALSA: hda/realtek: Add quirk for Clevo NS50PU commit 627ce0d68eb4b53e995b08089fa9da1e513ec5ba upstream. Fixes headset detection on Clevo NS50PU. Signed-off-by: Tim Crawford Cc: Link: https://lore.kernel.org/r/20220622150017.9897-1-tcrawford@system76.com Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 5639356c02940d..70664c9832d6fa 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -9000,6 +9000,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1558, 0x70f3, "Clevo NH77DPQ", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0x70f4, "Clevo NH77EPY", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0x70f6, "Clevo NH77DPQ-Y", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0x7716, "Clevo NS50PU", ALC256_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0x8228, "Clevo NR40BU", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0x8520, "Clevo NH50D[CD]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0x8521, "Clevo NH77D[CD]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), From cbf585269510e38ec57a8bf38cea4ec2fd0cc1b5 Mon Sep 17 00:00:00 2001 From: Rosemarie O'Riorden Date: Tue, 21 Jun 2022 16:48:45 -0400 Subject: [PATCH 0010/1056] net: openvswitch: fix parsing of nw_proto for IPv6 fragments commit 12378a5a75e33f34f8586706eb61cca9e6d4690c upstream. When a packet enters the OVS datapath and does not match any existing flows installed in the kernel flow cache, the packet will be sent to userspace to be parsed, and a new flow will be created. The kernel and OVS rely on each other to parse packet fields in the same way so that packets will be handled properly. As per the design document linked below, OVS expects all later IPv6 fragments to have nw_proto=44 in the flow key, so they can be correctly matched on OpenFlow rules. OpenFlow controllers create pipelines based on this design. This behavior was changed by the commit in the Fixes tag so that nw_proto equals the next_header field of the last extension header. However, there is no counterpart for this change in OVS userspace, meaning that this field is parsed differently between OVS and the kernel. This is a problem because OVS creates actions based on what is parsed in userspace, but the kernel-provided flow key is used as a match criteria, as described in Documentation/networking/openvswitch.rst. This leads to issues such as packets incorrectly matching on a flow and thus the wrong list of actions being applied to the packet. Such changes in packet parsing cannot be implemented without breaking the userspace. The offending commit is partially reverted to restore the expected behavior. The change technically made sense and there is a good reason that it was implemented, but it does not comply with the original design of OVS. If in the future someone wants to implement such a change, then it must be user-configurable and disabled by default to preserve backwards compatibility with existing OVS versions. Cc: stable@vger.kernel.org Fixes: fa642f08839b ("openvswitch: Derive IP protocol number for IPv6 later frags") Link: https://docs.openvswitch.org/en/latest/topics/design/#fragments Signed-off-by: Rosemarie O'Riorden Acked-by: Eelco Chaudron Link: https://lore.kernel.org/r/20220621204845.9721-1-roriorden@redhat.com Signed-off-by: Paolo Abeni Signed-off-by: Greg Kroah-Hartman --- net/openvswitch/flow.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index 02096f2ec67846..1b81d71bac3cf9 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -266,7 +266,7 @@ static int parse_ipv6hdr(struct sk_buff *skb, struct sw_flow_key *key) if (flags & IP6_FH_F_FRAG) { if (frag_off) { key->ip.frag = OVS_FRAG_TYPE_LATER; - key->ip.proto = nexthdr; + key->ip.proto = NEXTHDR_FRAGMENT; return 0; } key->ip.frag = OVS_FRAG_TYPE_FIRST; From 10629c04b3a8315f32f78331aad15d04f1d93db0 Mon Sep 17 00:00:00 2001 From: Tyler Hicks Date: Thu, 26 May 2022 18:59:59 -0500 Subject: [PATCH 0011/1056] 9p: Fix refcounting during full path walks for fid lookups commit 2a3dcbccd64ba35c045fac92272ff981c4cbef44 upstream. Decrement the refcount of the parent dentry's fid after walking each path component during a full path walk for a lookup. Failure to do so can lead to fids that are not clunked until the filesystem is unmounted, as indicated by this warning: 9pnet: found fid 3 not clunked The improper refcounting after walking resulted in open(2) returning -EIO on any directories underneath the mount point when using the virtio transport. When using the fd transport, there's no apparent issue until the filesytem is unmounted and the warning above is emitted to the logs. In some cases, the user may not yet be attached to the filesystem and a new root fid, associated with the user, is created and attached to the root dentry before the full path walk is performed. Increment the new root fid's refcount to two in that situation so that it can be safely decremented to one after it is used for the walk operation. The new fid will still be attached to the root dentry when v9fs_fid_lookup_with_uid() returns so a final refcount of one is correct/expected. Link: https://lkml.kernel.org/r/20220527000003.355812-2-tyhicks@linux.microsoft.com Link: https://lkml.kernel.org/r/20220612085330.1451496-4-asmadeus@codewreck.org Fixes: 6636b6dcc3db ("9p: add refcount to p9_fid struct") Cc: stable@vger.kernel.org Signed-off-by: Tyler Hicks Reviewed-by: Christian Schoenebeck [Dominique: fix clunking fid multiple times discussed in second link] Signed-off-by: Dominique Martinet Signed-off-by: Greg Kroah-Hartman --- fs/9p/fid.c | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/fs/9p/fid.c b/fs/9p/fid.c index e8a3b891b03668..c702a336837dde 100644 --- a/fs/9p/fid.c +++ b/fs/9p/fid.c @@ -151,7 +151,7 @@ static struct p9_fid *v9fs_fid_lookup_with_uid(struct dentry *dentry, const unsigned char **wnames, *uname; int i, n, l, clone, access; struct v9fs_session_info *v9ses; - struct p9_fid *fid, *old_fid = NULL; + struct p9_fid *fid, *old_fid; v9ses = v9fs_dentry2v9ses(dentry); access = v9ses->flags & V9FS_ACCESS_MASK; @@ -193,13 +193,12 @@ static struct p9_fid *v9fs_fid_lookup_with_uid(struct dentry *dentry, if (IS_ERR(fid)) return fid; + refcount_inc(&fid->count); v9fs_fid_add(dentry->d_sb->s_root, fid); } /* If we are root ourself just return that */ - if (dentry->d_sb->s_root == dentry) { - refcount_inc(&fid->count); + if (dentry->d_sb->s_root == dentry) return fid; - } /* * Do a multipath walk with attached root. * When walking parent we need to make sure we @@ -211,6 +210,7 @@ static struct p9_fid *v9fs_fid_lookup_with_uid(struct dentry *dentry, fid = ERR_PTR(n); goto err_out; } + old_fid = fid; clone = 1; i = 0; while (i < n) { @@ -220,19 +220,15 @@ static struct p9_fid *v9fs_fid_lookup_with_uid(struct dentry *dentry, * walk to ensure none of the patch component change */ fid = p9_client_walk(fid, l, &wnames[i], clone); + /* non-cloning walk will return the same fid */ + if (fid != old_fid) { + p9_client_clunk(old_fid); + old_fid = fid; + } if (IS_ERR(fid)) { - if (old_fid) { - /* - * If we fail, clunk fid which are mapping - * to path component and not the last component - * of the path. - */ - p9_client_clunk(old_fid); - } kfree(wnames); goto err_out; } - old_fid = fid; i += l; clone = 0; } From 22832ac3eb5be3f7168816a76b64c1284e12eb3c Mon Sep 17 00:00:00 2001 From: Dominique Martinet Date: Sun, 12 Jun 2022 16:00:05 +0900 Subject: [PATCH 0012/1056] 9p: fix fid refcount leak in v9fs_vfs_atomic_open_dotl commit beca774fc51a9ba8abbc869cf0c3d965ff17cd24 upstream. We need to release directory fid if we fail halfway through open This fixes fid leaking with xfstests generic 531 Link: https://lkml.kernel.org/r/20220612085330.1451496-2-asmadeus@codewreck.org Fixes: 6636b6dcc3db ("9p: add refcount to p9_fid struct") Cc: stable@vger.kernel.org Reported-by: Tyler Hicks Reviewed-by: Tyler Hicks Reviewed-by: Christian Schoenebeck Signed-off-by: Dominique Martinet Signed-off-by: Greg Kroah-Hartman --- fs/9p/vfs_inode_dotl.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c index a61df2e0ae5213..833c638437a728 100644 --- a/fs/9p/vfs_inode_dotl.c +++ b/fs/9p/vfs_inode_dotl.c @@ -276,6 +276,7 @@ v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry, if (IS_ERR(ofid)) { err = PTR_ERR(ofid); p9_debug(P9_DEBUG_VFS, "p9_client_walk failed %d\n", err); + p9_client_clunk(dfid); goto out; } @@ -287,6 +288,7 @@ v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry, if (err) { p9_debug(P9_DEBUG_VFS, "Failed to get acl values in creat %d\n", err); + p9_client_clunk(dfid); goto error; } err = p9_client_create_dotl(ofid, name, v9fs_open_to_dotl_flags(flags), @@ -294,6 +296,7 @@ v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry, if (err < 0) { p9_debug(P9_DEBUG_VFS, "p9_client_open_dotl failed in creat %d\n", err); + p9_client_clunk(dfid); goto error; } v9fs_invalidate_inode_attr(dir); From f0126bcaee81dabc1926012126aa74caa03a4c6e Mon Sep 17 00:00:00 2001 From: Dominique Martinet Date: Sun, 12 Jun 2022 17:14:55 +0900 Subject: [PATCH 0013/1056] 9p: fix fid refcount leak in v9fs_vfs_get_link commit e5690f263208c5abce7451370b7786eb25b405eb upstream. we check for protocol version later than required, after a fid has been obtained. Just move the version check earlier. Link: https://lkml.kernel.org/r/20220612085330.1451496-3-asmadeus@codewreck.org Fixes: 6636b6dcc3db ("9p: add refcount to p9_fid struct") Cc: stable@vger.kernel.org Reviewed-by: Tyler Hicks Reviewed-by: Christian Schoenebeck Signed-off-by: Dominique Martinet Signed-off-by: Greg Kroah-Hartman --- fs/9p/vfs_inode.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index 08f48b70a74145..15d9492536cf59 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -1228,15 +1228,15 @@ static const char *v9fs_vfs_get_link(struct dentry *dentry, return ERR_PTR(-ECHILD); v9ses = v9fs_dentry2v9ses(dentry); - fid = v9fs_fid_lookup(dentry); + if (!v9fs_proto_dotu(v9ses)) + return ERR_PTR(-EBADF); + p9_debug(P9_DEBUG_VFS, "%pd\n", dentry); + fid = v9fs_fid_lookup(dentry); if (IS_ERR(fid)) return ERR_CAST(fid); - if (!v9fs_proto_dotu(v9ses)) - return ERR_PTR(-EBADF); - st = p9_client_stat(fid); p9_client_clunk(fid); if (IS_ERR(st)) From 341d33128a940c6634175dcb6ca92dc454cfa7d2 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Wed, 18 May 2022 10:41:48 +0100 Subject: [PATCH 0014/1056] btrfs: fix hang during unmount when block group reclaim task is running commit 31e70e527806c546a72262f2fc3d982ee23c42d3 upstream. When we start an unmount, at close_ctree(), if we have the reclaim task running and in the middle of a data block group relocation, we can trigger a deadlock when stopping an async reclaim task, producing a trace like the following: [629724.498185] task:kworker/u16:7 state:D stack: 0 pid:681170 ppid: 2 flags:0x00004000 [629724.499760] Workqueue: events_unbound btrfs_async_reclaim_metadata_space [btrfs] [629724.501267] Call Trace: [629724.501759] [629724.502174] __schedule+0x3cb/0xed0 [629724.502842] schedule+0x4e/0xb0 [629724.503447] btrfs_wait_on_delayed_iputs+0x7c/0xc0 [btrfs] [629724.504534] ? prepare_to_wait_exclusive+0xc0/0xc0 [629724.505442] flush_space+0x423/0x630 [btrfs] [629724.506296] ? rcu_read_unlock_trace_special+0x20/0x50 [629724.507259] ? lock_release+0x220/0x4a0 [629724.507932] ? btrfs_get_alloc_profile+0xb3/0x290 [btrfs] [629724.508940] ? do_raw_spin_unlock+0x4b/0xa0 [629724.509688] btrfs_async_reclaim_metadata_space+0x139/0x320 [btrfs] [629724.510922] process_one_work+0x252/0x5a0 [629724.511694] ? process_one_work+0x5a0/0x5a0 [629724.512508] worker_thread+0x52/0x3b0 [629724.513220] ? process_one_work+0x5a0/0x5a0 [629724.514021] kthread+0xf2/0x120 [629724.514627] ? kthread_complete_and_exit+0x20/0x20 [629724.515526] ret_from_fork+0x22/0x30 [629724.516236] [629724.516694] task:umount state:D stack: 0 pid:719055 ppid:695412 flags:0x00004000 [629724.518269] Call Trace: [629724.518746] [629724.519160] __schedule+0x3cb/0xed0 [629724.519835] schedule+0x4e/0xb0 [629724.520467] schedule_timeout+0xed/0x130 [629724.521221] ? lock_release+0x220/0x4a0 [629724.521946] ? lock_acquired+0x19c/0x420 [629724.522662] ? trace_hardirqs_on+0x1b/0xe0 [629724.523411] __wait_for_common+0xaf/0x1f0 [629724.524189] ? usleep_range_state+0xb0/0xb0 [629724.524997] __flush_work+0x26d/0x530 [629724.525698] ? flush_workqueue_prep_pwqs+0x140/0x140 [629724.526580] ? lock_acquire+0x1a0/0x310 [629724.527324] __cancel_work_timer+0x137/0x1c0 [629724.528190] close_ctree+0xfd/0x531 [btrfs] [629724.529000] ? evict_inodes+0x166/0x1c0 [629724.529510] generic_shutdown_super+0x74/0x120 [629724.530103] kill_anon_super+0x14/0x30 [629724.530611] btrfs_kill_super+0x12/0x20 [btrfs] [629724.531246] deactivate_locked_super+0x31/0xa0 [629724.531817] cleanup_mnt+0x147/0x1c0 [629724.532319] task_work_run+0x5c/0xa0 [629724.532984] exit_to_user_mode_prepare+0x1a6/0x1b0 [629724.533598] syscall_exit_to_user_mode+0x16/0x40 [629724.534200] do_syscall_64+0x48/0x90 [629724.534667] entry_SYSCALL_64_after_hwframe+0x44/0xae [629724.535318] RIP: 0033:0x7fa2b90437a7 [629724.535804] RSP: 002b:00007ffe0b7e4458 EFLAGS: 00000246 ORIG_RAX: 00000000000000a6 [629724.536912] RAX: 0000000000000000 RBX: 00007fa2b9182264 RCX: 00007fa2b90437a7 [629724.538156] RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000555d6cf20dd0 [629724.539053] RBP: 0000555d6cf20ba0 R08: 0000000000000000 R09: 00007ffe0b7e3200 [629724.539956] R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 [629724.540883] R13: 0000555d6cf20dd0 R14: 0000555d6cf20cb0 R15: 0000000000000000 [629724.541796] This happens because: 1) Before entering close_ctree() we have the async block group reclaim task running and relocating a data block group; 2) There's an async metadata (or data) space reclaim task running; 3) We enter close_ctree() and park the cleaner kthread; 4) The async space reclaim task is at flush_space() and runs all the existing delayed iputs; 5) Before the async space reclaim task calls btrfs_wait_on_delayed_iputs(), the block group reclaim task which is doing the data block group relocation, creates a delayed iput at replace_file_extents() (called when COWing leaves that have file extent items pointing to relocated data extents, during the merging phase of relocation roots); 6) The async reclaim space reclaim task blocks at btrfs_wait_on_delayed_iputs(), since we have a new delayed iput; 7) The task at close_ctree() then calls cancel_work_sync() to stop the async space reclaim task, but it blocks since that task is waiting for the delayed iput to be run; 8) The delayed iput is never run because the cleaner kthread is parked, and no one else runs delayed iputs, resulting in a hang. So fix this by stopping the async block group reclaim task before we park the cleaner kthread. Fixes: 18bb8bbf13c183 ("btrfs: zoned: automatically reclaim zones") CC: stable@vger.kernel.org # 5.15+ Signed-off-by: Filipe Manana Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/disk-io.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 9ab2792055271a..233d894f6febaf 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -4360,6 +4360,17 @@ void __cold close_ctree(struct btrfs_fs_info *fs_info) int ret; set_bit(BTRFS_FS_CLOSING_START, &fs_info->flags); + + /* + * We may have the reclaim task running and relocating a data block group, + * in which case it may create delayed iputs. So stop it before we park + * the cleaner kthread otherwise we can get new delayed iputs after + * parking the cleaner, and that can make the async reclaim task to hang + * if it's waiting for delayed iputs to complete, since the cleaner is + * parked and can not run delayed iputs - this will make us hang when + * trying to stop the async reclaim task. + */ + cancel_work_sync(&fs_info->reclaim_bgs_work); /* * We don't want the cleaner to start new transactions, add more delayed * iputs, etc. while we're closing. We can't use kthread_stop() yet @@ -4400,8 +4411,6 @@ void __cold close_ctree(struct btrfs_fs_info *fs_info) cancel_work_sync(&fs_info->async_data_reclaim_work); cancel_work_sync(&fs_info->preempt_reclaim_work); - cancel_work_sync(&fs_info->reclaim_bgs_work); - /* Cancel or finish ongoing discard work */ btrfs_discard_cleanup(fs_info); From 82e3769c0257360319ea1cb3c6fdaaed7a8b70e3 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Wed, 18 May 2022 13:03:09 +0800 Subject: [PATCH 0015/1056] btrfs: prevent remounting to v1 space cache for subpage mount commit 0591f04036218d572d54349ea8c7914ad9c82b2b upstream. Upstream commit 9f73f1aef98b ("btrfs: force v2 space cache usage for subpage mount") forces subpage mount to use v2 cache, to avoid deprecated v1 cache which doesn't support subpage properly. But there is a loophole that user can still remount to v1 cache. The existing check will only give users a warning, but does not really prevent to do the remount. Although remounting to v1 will not cause any problems since the v1 cache will always be marked invalid when mounted with a different page size, it's still better to prevent v1 cache at all for subpage mounts. Fixes: 9f73f1aef98b ("btrfs: force v2 space cache usage for subpage mount") CC: stable@vger.kernel.org # 5.15+ Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/super.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 7f91d62c2225a7..78b78d38f880a9 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1917,6 +1917,14 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data) if (ret) goto restore; + /* V1 cache is not supported for subpage mount. */ + if (fs_info->sectorsize < PAGE_SIZE && btrfs_test_opt(fs_info, SPACE_CACHE)) { + btrfs_warn(fs_info, + "v1 space cache is not supported for page size %lu with sectorsize %u", + PAGE_SIZE, fs_info->sectorsize); + ret = -EINVAL; + goto restore; + } btrfs_remount_begin(fs_info, old_opts, *flags); btrfs_resize_thread_pool(fs_info, fs_info->thread_pool_size, old_thread_pool_size); From 4a19c1cee0de6eea55f8ff4daad14ab33bff6fb3 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Thu, 2 Jun 2022 23:57:17 +0200 Subject: [PATCH 0016/1056] btrfs: add error messages to all unrecognized mount options commit e3a4167c880cf889f66887a152799df4d609dd21 upstream. Almost none of the errors stemming from a valid mount option but wrong value prints a descriptive message which would help to identify why mount failed. Like in the linked report: $ uname -r v4.19 $ mount -o compress=zstd /dev/sdb /mnt mount: /mnt: wrong fs type, bad option, bad superblock on /dev/sdb, missing codepage or helper program, or other error. $ dmesg ... BTRFS error (device sdb): open_ctree failed Errors caused by memory allocation failures are left out as it's not a user error so reporting that would be confusing. Link: https://lore.kernel.org/linux-btrfs/9c3fec36-fc61-3a33-4977-a7e207c3fa4e@gmx.de/ CC: stable@vger.kernel.org # 4.9+ Reviewed-by: Qu Wenruo Reviewed-by: Nikolay Borisov Reviewed-by: Anand Jain Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/super.c | 39 ++++++++++++++++++++++++++++++++------- 1 file changed, 32 insertions(+), 7 deletions(-) diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 78b78d38f880a9..969bf0724fdfe6 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -712,6 +712,8 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options, compress_force = false; no_compress++; } else { + btrfs_err(info, "unrecognized compression value %s", + args[0].from); ret = -EINVAL; goto out; } @@ -770,8 +772,11 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options, case Opt_thread_pool: ret = match_int(&args[0], &intarg); if (ret) { + btrfs_err(info, "unrecognized thread_pool value %s", + args[0].from); goto out; } else if (intarg == 0) { + btrfs_err(info, "invalid value 0 for thread_pool"); ret = -EINVAL; goto out; } @@ -832,8 +837,11 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options, break; case Opt_ratio: ret = match_int(&args[0], &intarg); - if (ret) + if (ret) { + btrfs_err(info, "unrecognized metadata_ratio value %s", + args[0].from); goto out; + } info->metadata_ratio = intarg; btrfs_info(info, "metadata ratio %u", info->metadata_ratio); @@ -850,6 +858,8 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options, btrfs_set_and_info(info, DISCARD_ASYNC, "turning on async discard"); } else { + btrfs_err(info, "unrecognized discard mode value %s", + args[0].from); ret = -EINVAL; goto out; } @@ -874,6 +884,8 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options, btrfs_set_and_info(info, FREE_SPACE_TREE, "enabling free space tree"); } else { + btrfs_err(info, "unrecognized space_cache value %s", + args[0].from); ret = -EINVAL; goto out; } @@ -943,8 +955,12 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options, break; case Opt_check_integrity_print_mask: ret = match_int(&args[0], &intarg); - if (ret) + if (ret) { + btrfs_err(info, + "unrecognized check_integrity_print_mask value %s", + args[0].from); goto out; + } info->check_integrity_print_mask = intarg; btrfs_info(info, "check_integrity_print_mask 0x%x", info->check_integrity_print_mask); @@ -959,13 +975,15 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options, goto out; #endif case Opt_fatal_errors: - if (strcmp(args[0].from, "panic") == 0) + if (strcmp(args[0].from, "panic") == 0) { btrfs_set_opt(info->mount_opt, PANIC_ON_FATAL_ERROR); - else if (strcmp(args[0].from, "bug") == 0) + } else if (strcmp(args[0].from, "bug") == 0) { btrfs_clear_opt(info->mount_opt, PANIC_ON_FATAL_ERROR); - else { + } else { + btrfs_err(info, "unrecognized fatal_errors value %s", + args[0].from); ret = -EINVAL; goto out; } @@ -973,8 +991,12 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options, case Opt_commit_interval: intarg = 0; ret = match_int(&args[0], &intarg); - if (ret) + if (ret) { + btrfs_err(info, "unrecognized commit_interval value %s", + args[0].from); + ret = -EINVAL; goto out; + } if (intarg == 0) { btrfs_info(info, "using default commit interval %us", @@ -988,8 +1010,11 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options, break; case Opt_rescue: ret = parse_rescue_options(info, args[0].from); - if (ret < 0) + if (ret < 0) { + btrfs_err(info, "unrecognized rescue value %s", + args[0].from); goto out; + } break; #ifdef CONFIG_BTRFS_DEBUG case Opt_fragment_all: From 8540f66196ca35b7b5e902932571c18b9fde0cd1 Mon Sep 17 00:00:00 2001 From: Tyrel Datwyler Date: Thu, 16 Jun 2022 12:11:25 -0700 Subject: [PATCH 0017/1056] scsi: ibmvfc: Store vhost pointer during subcrq allocation commit aeaadcde1a60138bceb65de3cdaeec78170b4459 upstream. Currently the back pointer from a queue to the vhost adapter isn't set until after subcrq interrupt registration. The value is available when a queue is first allocated and can/should be also set for primary and async queues as well as subcrqs. This fixes a crash observed during kexec/kdump on Power 9 with legacy XICS interrupt controller where a pending subcrq interrupt from the previous kernel can be replayed immediately upon IRQ registration resulting in dereference of a garbage backpointer in ibmvfc_interrupt_scsi(). Kernel attempted to read user page (58) - exploit attempt? (uid: 0) BUG: Kernel NULL pointer dereference on read at 0x00000058 Faulting instruction address: 0xc008000003216a08 Oops: Kernel access of bad area, sig: 11 [#1] ... NIP [c008000003216a08] ibmvfc_interrupt_scsi+0x40/0xb0 [ibmvfc] LR [c0000000082079e8] __handle_irq_event_percpu+0x98/0x270 Call Trace: [c000000047fa3d80] [c0000000123e6180] 0xc0000000123e6180 (unreliable) [c000000047fa3df0] [c0000000082079e8] __handle_irq_event_percpu+0x98/0x270 [c000000047fa3ea0] [c000000008207d18] handle_irq_event+0x98/0x188 [c000000047fa3ef0] [c00000000820f564] handle_fasteoi_irq+0xc4/0x310 [c000000047fa3f40] [c000000008205c60] generic_handle_irq+0x50/0x80 [c000000047fa3f60] [c000000008015c40] __do_irq+0x70/0x1a0 [c000000047fa3f90] [c000000008016d7c] __do_IRQ+0x9c/0x130 [c000000014622f60] [0000000020000000] 0x20000000 [c000000014622ff0] [c000000008016e50] do_IRQ+0x40/0xa0 [c000000014623020] [c000000008017044] replay_soft_interrupts+0x194/0x2f0 [c000000014623210] [c0000000080172a8] arch_local_irq_restore+0x108/0x170 [c000000014623240] [c000000008eb1008] _raw_spin_unlock_irqrestore+0x58/0xb0 [c000000014623270] [c00000000820b12c] __setup_irq+0x49c/0x9f0 [c000000014623310] [c00000000820b7c0] request_threaded_irq+0x140/0x230 [c000000014623380] [c008000003212a50] ibmvfc_register_scsi_channel+0x1e8/0x2f0 [ibmvfc] [c000000014623450] [c008000003213d1c] ibmvfc_init_sub_crqs+0xc4/0x1f0 [ibmvfc] [c0000000146234d0] [c0080000032145a8] ibmvfc_reset_crq+0x150/0x210 [ibmvfc] [c000000014623550] [c0080000032147c8] ibmvfc_init_crq+0x160/0x280 [ibmvfc] [c0000000146235f0] [c00800000321a9cc] ibmvfc_probe+0x2a4/0x530 [ibmvfc] Link: https://lore.kernel.org/r/20220616191126.1281259-2-tyreld@linux.ibm.com Fixes: 3034ebe26389 ("scsi: ibmvfc: Add alloc/dealloc routines for SCSI Sub-CRQ Channels") Cc: stable@vger.kernel.org Reviewed-by: Brian King Signed-off-by: Tyrel Datwyler Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/ibmvscsi/ibmvfc.c | 3 ++- drivers/scsi/ibmvscsi/ibmvfc.h | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c index 01f79991bf4a28..8ce301b763ad1e 100644 --- a/drivers/scsi/ibmvscsi/ibmvfc.c +++ b/drivers/scsi/ibmvscsi/ibmvfc.c @@ -5680,6 +5680,8 @@ static int ibmvfc_alloc_queue(struct ibmvfc_host *vhost, queue->cur = 0; queue->fmt = fmt; queue->size = PAGE_SIZE / fmt_size; + + queue->vhost = vhost; return 0; } @@ -5788,7 +5790,6 @@ static int ibmvfc_register_scsi_channel(struct ibmvfc_host *vhost, } scrq->hwq_id = index; - scrq->vhost = vhost; LEAVE; return 0; diff --git a/drivers/scsi/ibmvscsi/ibmvfc.h b/drivers/scsi/ibmvscsi/ibmvfc.h index 3718406e098876..c39a245f43d025 100644 --- a/drivers/scsi/ibmvscsi/ibmvfc.h +++ b/drivers/scsi/ibmvscsi/ibmvfc.h @@ -789,6 +789,7 @@ struct ibmvfc_queue { spinlock_t _lock; spinlock_t *q_lock; + struct ibmvfc_host *vhost; struct ibmvfc_event_pool evt_pool; struct list_head sent; struct list_head free; @@ -797,7 +798,6 @@ struct ibmvfc_queue { union ibmvfc_iu cancel_rsp; /* Sub-CRQ fields */ - struct ibmvfc_host *vhost; unsigned long cookie; unsigned long vios_cookie; unsigned long hw_irq; From 161ec2a0807ddd58bc0f24f3e1e7e3d4fef5297f Mon Sep 17 00:00:00 2001 From: Tyrel Datwyler Date: Thu, 16 Jun 2022 12:11:26 -0700 Subject: [PATCH 0018/1056] scsi: ibmvfc: Allocate/free queue resource only during probe/remove commit 72ea7fe0db73d65c7d977208842d8ade9b823de9 upstream. Currently, the sub-queues and event pool resources are allocated/freed for every CRQ connection event such as reset and LPM. This exposes the driver to a couple issues. First the inefficiency of freeing and reallocating memory that can simply be resued after being sanitized. Further, a system under memory pressue runs the risk of allocation failures that could result in a crippled driver. Finally, there is a race window where command submission/compeletion can try to pull/return elements from/to an event pool that is being deleted or already has been deleted due to the lack of host state around freeing/allocating resources. The following is an example of list corruption following a live partition migration (LPM): Oops: Exception in kernel mode, sig: 5 [#1] LE PAGE_SIZE=64K MMU=Hash SMP NR_CPUS=2048 NUMA pSeries Modules linked in: vfat fat isofs cdrom ext4 mbcache jbd2 nft_counter nft_compat nf_tables nfnetlink rpadlpar_io rpaphp xsk_diag nfsv3 nfs_acl nfs lockd grace fscache netfs rfkill bonding tls sunrpc pseries_rng drm drm_panel_orientation_quirks xfs libcrc32c dm_service_time sd_mod t10_pi sg ibmvfc scsi_transport_fc ibmveth vmx_crypto dm_multipath dm_mirror dm_region_hash dm_log dm_mod ipmi_devintf ipmi_msghandler fuse CPU: 0 PID: 2108 Comm: ibmvfc_0 Kdump: loaded Not tainted 5.14.0-70.9.1.el9_0.ppc64le #1 NIP: c0000000007c4bb0 LR: c0000000007c4bac CTR: 00000000005b9a10 REGS: c00000025c10b760 TRAP: 0700 Not tainted (5.14.0-70.9.1.el9_0.ppc64le) MSR: 800000000282b033 CR: 2800028f XER: 0000000f CFAR: c0000000001f55bc IRQMASK: 0 GPR00: c0000000007c4bac c00000025c10ba00 c000000002a47c00 000000000000004e GPR04: c0000031e3006f88 c0000031e308bd00 c00000025c10b768 0000000000000027 GPR08: 0000000000000000 c0000031e3009dc0 00000031e0eb0000 0000000000000000 GPR12: c0000031e2ffffa8 c000000002dd0000 c000000000187108 c00000020fcee2c0 GPR16: 0000000000000000 0000000000000000 0000000000000000 0000000000000000 GPR20: 0000000000000000 0000000000000000 0000000000000000 c008000002f81300 GPR24: 5deadbeef0000100 5deadbeef0000122 c000000263ba6910 c00000024cc88000 GPR28: 000000000000003c c0000002430a0000 c0000002430ac300 000000000000c300 NIP [c0000000007c4bb0] __list_del_entry_valid+0x90/0x100 LR [c0000000007c4bac] __list_del_entry_valid+0x8c/0x100 Call Trace: [c00000025c10ba00] [c0000000007c4bac] __list_del_entry_valid+0x8c/0x100 (unreliable) [c00000025c10ba60] [c008000002f42284] ibmvfc_free_queue+0xec/0x210 [ibmvfc] [c00000025c10bb10] [c008000002f4246c] ibmvfc_deregister_scsi_channel+0xc4/0x160 [ibmvfc] [c00000025c10bba0] [c008000002f42580] ibmvfc_release_sub_crqs+0x78/0x130 [ibmvfc] [c00000025c10bc20] [c008000002f4f6cc] ibmvfc_do_work+0x5c4/0xc70 [ibmvfc] [c00000025c10bce0] [c008000002f4fdec] ibmvfc_work+0x74/0x1e8 [ibmvfc] [c00000025c10bda0] [c0000000001872b8] kthread+0x1b8/0x1c0 [c00000025c10be10] [c00000000000cd64] ret_from_kernel_thread+0x5c/0x64 Instruction dump: 40820034 38600001 38210060 4e800020 7c0802a6 7c641b78 3c62fe7a 7d254b78 3863b590 f8010070 4ba309cd 60000000 <0fe00000> 7c0802a6 3c62fe7a 3863b640 ---[ end trace 11a2b65a92f8b66c ]--- ibmvfc 30000003: Send warning. Receive queue closed, will retry. Add registration/deregistration helpers that are called instead during connection resets to sanitize and reconfigure the queues. Link: https://lore.kernel.org/r/20220616191126.1281259-3-tyreld@linux.ibm.com Fixes: 3034ebe26389 ("scsi: ibmvfc: Add alloc/dealloc routines for SCSI Sub-CRQ Channels") Cc: stable@vger.kernel.org Reviewed-by: Brian King Signed-off-by: Tyrel Datwyler Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/ibmvscsi/ibmvfc.c | 79 ++++++++++++++++++++++++++-------- 1 file changed, 62 insertions(+), 17 deletions(-) diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c index 8ce301b763ad1e..b3531065a43874 100644 --- a/drivers/scsi/ibmvscsi/ibmvfc.c +++ b/drivers/scsi/ibmvscsi/ibmvfc.c @@ -160,8 +160,8 @@ static void ibmvfc_npiv_logout(struct ibmvfc_host *); static void ibmvfc_tgt_implicit_logout_and_del(struct ibmvfc_target *); static void ibmvfc_tgt_move_login(struct ibmvfc_target *); -static void ibmvfc_release_sub_crqs(struct ibmvfc_host *); -static void ibmvfc_init_sub_crqs(struct ibmvfc_host *); +static void ibmvfc_dereg_sub_crqs(struct ibmvfc_host *); +static void ibmvfc_reg_sub_crqs(struct ibmvfc_host *); static const char *unknown_error = "unknown error"; @@ -917,7 +917,7 @@ static int ibmvfc_reenable_crq_queue(struct ibmvfc_host *vhost) struct vio_dev *vdev = to_vio_dev(vhost->dev); unsigned long flags; - ibmvfc_release_sub_crqs(vhost); + ibmvfc_dereg_sub_crqs(vhost); /* Re-enable the CRQ */ do { @@ -936,7 +936,7 @@ static int ibmvfc_reenable_crq_queue(struct ibmvfc_host *vhost) spin_unlock(vhost->crq.q_lock); spin_unlock_irqrestore(vhost->host->host_lock, flags); - ibmvfc_init_sub_crqs(vhost); + ibmvfc_reg_sub_crqs(vhost); return rc; } @@ -955,7 +955,7 @@ static int ibmvfc_reset_crq(struct ibmvfc_host *vhost) struct vio_dev *vdev = to_vio_dev(vhost->dev); struct ibmvfc_queue *crq = &vhost->crq; - ibmvfc_release_sub_crqs(vhost); + ibmvfc_dereg_sub_crqs(vhost); /* Close the CRQ */ do { @@ -988,7 +988,7 @@ static int ibmvfc_reset_crq(struct ibmvfc_host *vhost) spin_unlock(vhost->crq.q_lock); spin_unlock_irqrestore(vhost->host->host_lock, flags); - ibmvfc_init_sub_crqs(vhost); + ibmvfc_reg_sub_crqs(vhost); return rc; } @@ -5757,9 +5757,6 @@ static int ibmvfc_register_scsi_channel(struct ibmvfc_host *vhost, ENTER; - if (ibmvfc_alloc_queue(vhost, scrq, IBMVFC_SUB_CRQ_FMT)) - return -ENOMEM; - rc = h_reg_sub_crq(vdev->unit_address, scrq->msg_token, PAGE_SIZE, &scrq->cookie, &scrq->hw_irq); @@ -5799,7 +5796,6 @@ static int ibmvfc_register_scsi_channel(struct ibmvfc_host *vhost, rc = plpar_hcall_norets(H_FREE_SUB_CRQ, vdev->unit_address, scrq->cookie); } while (rtas_busy_delay(rc)); reg_failed: - ibmvfc_free_queue(vhost, scrq); LEAVE; return rc; } @@ -5825,12 +5821,50 @@ static void ibmvfc_deregister_scsi_channel(struct ibmvfc_host *vhost, int index) if (rc) dev_err(dev, "Failed to free sub-crq[%d]: rc=%ld\n", index, rc); - ibmvfc_free_queue(vhost, scrq); + /* Clean out the queue */ + memset(scrq->msgs.crq, 0, PAGE_SIZE); + scrq->cur = 0; + + LEAVE; +} + +static void ibmvfc_reg_sub_crqs(struct ibmvfc_host *vhost) +{ + int i, j; + + ENTER; + if (!vhost->mq_enabled || !vhost->scsi_scrqs.scrqs) + return; + + for (i = 0; i < nr_scsi_hw_queues; i++) { + if (ibmvfc_register_scsi_channel(vhost, i)) { + for (j = i; j > 0; j--) + ibmvfc_deregister_scsi_channel(vhost, j - 1); + vhost->do_enquiry = 0; + return; + } + } + + LEAVE; +} + +static void ibmvfc_dereg_sub_crqs(struct ibmvfc_host *vhost) +{ + int i; + + ENTER; + if (!vhost->mq_enabled || !vhost->scsi_scrqs.scrqs) + return; + + for (i = 0; i < nr_scsi_hw_queues; i++) + ibmvfc_deregister_scsi_channel(vhost, i); + LEAVE; } static void ibmvfc_init_sub_crqs(struct ibmvfc_host *vhost) { + struct ibmvfc_queue *scrq; int i, j; ENTER; @@ -5846,30 +5880,41 @@ static void ibmvfc_init_sub_crqs(struct ibmvfc_host *vhost) } for (i = 0; i < nr_scsi_hw_queues; i++) { - if (ibmvfc_register_scsi_channel(vhost, i)) { - for (j = i; j > 0; j--) - ibmvfc_deregister_scsi_channel(vhost, j - 1); + scrq = &vhost->scsi_scrqs.scrqs[i]; + if (ibmvfc_alloc_queue(vhost, scrq, IBMVFC_SUB_CRQ_FMT)) { + for (j = i; j > 0; j--) { + scrq = &vhost->scsi_scrqs.scrqs[j - 1]; + ibmvfc_free_queue(vhost, scrq); + } kfree(vhost->scsi_scrqs.scrqs); vhost->scsi_scrqs.scrqs = NULL; vhost->scsi_scrqs.active_queues = 0; vhost->do_enquiry = 0; - break; + vhost->mq_enabled = 0; + return; } } + ibmvfc_reg_sub_crqs(vhost); + LEAVE; } static void ibmvfc_release_sub_crqs(struct ibmvfc_host *vhost) { + struct ibmvfc_queue *scrq; int i; ENTER; if (!vhost->scsi_scrqs.scrqs) return; - for (i = 0; i < nr_scsi_hw_queues; i++) - ibmvfc_deregister_scsi_channel(vhost, i); + ibmvfc_dereg_sub_crqs(vhost); + + for (i = 0; i < nr_scsi_hw_queues; i++) { + scrq = &vhost->scsi_scrqs.scrqs[i]; + ibmvfc_free_queue(vhost, scrq); + } kfree(vhost->scsi_scrqs.scrqs); vhost->scsi_scrqs.scrqs = NULL; From 7df8c497214b32b5de70215ab7897e7844477aa5 Mon Sep 17 00:00:00 2001 From: Chevron Li Date: Thu, 2 Jun 2022 06:25:43 -0700 Subject: [PATCH 0019/1056] mmc: sdhci-pci-o2micro: Fix card detect by dealing with debouncing commit e591fcf6b4e39335c9b128b17738fcd2fdd278ae upstream. The result from ->get_cd() may be incorrect as the card detect debouncing isn't managed correctly. Let's fix it. Signed-off-by: Chevron Li Fixes: 7d44061704dd ("mmc: sdhci-pci-o2micro: Fix O2 Host data read/write DLL Lock phase shift issue") Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20220602132543.596-1-chevron.li@bayhubtech.com [Ulf: Updated the commit message] Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/sdhci-pci-o2micro.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/mmc/host/sdhci-pci-o2micro.c b/drivers/mmc/host/sdhci-pci-o2micro.c index 51d55a87aebeff..059034e832c927 100644 --- a/drivers/mmc/host/sdhci-pci-o2micro.c +++ b/drivers/mmc/host/sdhci-pci-o2micro.c @@ -147,6 +147,8 @@ static int sdhci_o2_get_cd(struct mmc_host *mmc) if (!(sdhci_readw(host, O2_PLL_DLL_WDT_CONTROL1) & O2_PLL_LOCK_STATUS)) sdhci_o2_enable_internal_clock(host); + else + sdhci_o2_wait_card_detect_stable(host); return !!(sdhci_readl(host, SDHCI_PRESENT_STATE) & SDHCI_CARD_PRESENT); } From fe9ba4f29320c426ec1d3f2c171ef4078a6cd1a1 Mon Sep 17 00:00:00 2001 From: Mengqi Zhang Date: Thu, 9 Jun 2022 19:22:39 +0800 Subject: [PATCH 0020/1056] mmc: mediatek: wait dma stop bit reset to 0 commit 89bcd9a64b849380ef57e3032b307574e48db524 upstream. MediaTek IP requires that after dma stop, it need to wait this dma stop bit auto-reset to 0. When bus is in high loading state, it will take a while for the dma stop complete. If there is no waiting operation here, when program runs to clear fifo and reset, bus will hang. In addition, there should be no return in msdc_data_xfer_next() if there is data need be transferred, because no matter what error occurs here, it should continue to excute to the following mmc_request_done. Otherwise the core layer may wait complete forever. Signed-off-by: Mengqi Zhang Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20220609112239.18911-1-mengqi.zhang@mediatek.com Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/mtk-sd.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/drivers/mmc/host/mtk-sd.c b/drivers/mmc/host/mtk-sd.c index 1ac92015992ed1..f9b2897569bb49 100644 --- a/drivers/mmc/host/mtk-sd.c +++ b/drivers/mmc/host/mtk-sd.c @@ -1355,7 +1355,7 @@ static void msdc_data_xfer_next(struct msdc_host *host, struct mmc_request *mrq) msdc_request_done(host, mrq); } -static bool msdc_data_xfer_done(struct msdc_host *host, u32 events, +static void msdc_data_xfer_done(struct msdc_host *host, u32 events, struct mmc_request *mrq, struct mmc_data *data) { struct mmc_command *stop; @@ -1375,7 +1375,7 @@ static bool msdc_data_xfer_done(struct msdc_host *host, u32 events, spin_unlock_irqrestore(&host->lock, flags); if (done) - return true; + return; stop = data->stop; if (check_data || (stop && stop->error)) { @@ -1384,12 +1384,15 @@ static bool msdc_data_xfer_done(struct msdc_host *host, u32 events, sdr_set_field(host->base + MSDC_DMA_CTRL, MSDC_DMA_CTRL_STOP, 1); + ret = readl_poll_timeout_atomic(host->base + MSDC_DMA_CTRL, val, + !(val & MSDC_DMA_CTRL_STOP), 1, 20000); + if (ret) + dev_dbg(host->dev, "DMA stop timed out\n"); + ret = readl_poll_timeout_atomic(host->base + MSDC_DMA_CFG, val, !(val & MSDC_DMA_CFG_STS), 1, 20000); - if (ret) { - dev_dbg(host->dev, "DMA stop timed out\n"); - return false; - } + if (ret) + dev_dbg(host->dev, "DMA inactive timed out\n"); sdr_clr_bits(host->base + MSDC_INTEN, data_ints_mask); dev_dbg(host->dev, "DMA stop\n"); @@ -1414,9 +1417,7 @@ static bool msdc_data_xfer_done(struct msdc_host *host, u32 events, } msdc_data_xfer_next(host, mrq); - done = true; } - return done; } static void msdc_set_buswidth(struct msdc_host *host, u32 width) @@ -2347,6 +2348,9 @@ static void msdc_cqe_disable(struct mmc_host *mmc, bool recovery) if (recovery) { sdr_set_field(host->base + MSDC_DMA_CTRL, MSDC_DMA_CTRL_STOP, 1); + if (WARN_ON(readl_poll_timeout(host->base + MSDC_DMA_CTRL, val, + !(val & MSDC_DMA_CTRL_STOP), 1, 3000))) + return; if (WARN_ON(readl_poll_timeout(host->base + MSDC_DMA_CFG, val, !(val & MSDC_DMA_CFG_STS), 1, 3000))) return; From 87a54feba68f5e47925c8e49100db9b2a8add761 Mon Sep 17 00:00:00 2001 From: Demi Marie Obenour Date: Tue, 21 Jun 2022 22:27:26 -0400 Subject: [PATCH 0021/1056] xen/gntdev: Avoid blocking in unmap_grant_pages() commit dbe97cff7dd9f0f75c524afdd55ad46be3d15295 upstream. unmap_grant_pages() currently waits for the pages to no longer be used. In https://github.com/QubesOS/qubes-issues/issues/7481, this lead to a deadlock against i915: i915 was waiting for gntdev's MMU notifier to finish, while gntdev was waiting for i915 to free its pages. I also believe this is responsible for various deadlocks I have experienced in the past. Avoid these problems by making unmap_grant_pages async. This requires making it return void, as any errors will not be available when the function returns. Fortunately, the only use of the return value is a WARN_ON(), which can be replaced by a WARN_ON when the error is detected. Additionally, a failed call will not prevent further calls from being made, but this is harmless. Because unmap_grant_pages is now async, the grant handle will be sent to INVALID_GRANT_HANDLE too late to prevent multiple unmaps of the same handle. Instead, a separate bool array is allocated for this purpose. This wastes memory, but stuffing this information in padding bytes is too fragile. Furthermore, it is necessary to grab a reference to the map before making the asynchronous call, and release the reference when the call returns. It is also necessary to guard against reentrancy in gntdev_map_put(), and to handle the case where userspace tries to map a mapping whose contents have not all been freed yet. Fixes: 745282256c75 ("xen/gntdev: safely unmap grants in case they are still in use") Cc: stable@vger.kernel.org Signed-off-by: Demi Marie Obenour Reviewed-by: Juergen Gross Link: https://lore.kernel.org/r/20220622022726.2538-1-demi@invisiblethingslab.com Signed-off-by: Juergen Gross Signed-off-by: Greg Kroah-Hartman --- drivers/xen/gntdev-common.h | 7 ++ drivers/xen/gntdev.c | 157 ++++++++++++++++++++++++------------ 2 files changed, 113 insertions(+), 51 deletions(-) diff --git a/drivers/xen/gntdev-common.h b/drivers/xen/gntdev-common.h index 20d7d059dadb55..40ef379c28ab01 100644 --- a/drivers/xen/gntdev-common.h +++ b/drivers/xen/gntdev-common.h @@ -16,6 +16,7 @@ #include #include #include +#include struct gntdev_dmabuf_priv; @@ -56,6 +57,7 @@ struct gntdev_grant_map { struct gnttab_unmap_grant_ref *unmap_ops; struct gnttab_map_grant_ref *kmap_ops; struct gnttab_unmap_grant_ref *kunmap_ops; + bool *being_removed; struct page **pages; unsigned long pages_vm_start; @@ -73,6 +75,11 @@ struct gntdev_grant_map { /* Needed to avoid allocation in gnttab_dma_free_pages(). */ xen_pfn_t *frames; #endif + + /* Number of live grants */ + atomic_t live_grants; + /* Needed to avoid allocation in __unmap_grant_pages */ + struct gntab_unmap_queue_data unmap_data; }; struct gntdev_grant_map *gntdev_alloc_map(struct gntdev_priv *priv, int count, diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c index 59ffea80007914..4b56c39f766d4d 100644 --- a/drivers/xen/gntdev.c +++ b/drivers/xen/gntdev.c @@ -35,6 +35,7 @@ #include #include #include +#include #include #include @@ -60,10 +61,11 @@ module_param(limit, uint, 0644); MODULE_PARM_DESC(limit, "Maximum number of grants that may be mapped by one mapping request"); +/* True in PV mode, false otherwise */ static int use_ptemod; -static int unmap_grant_pages(struct gntdev_grant_map *map, - int offset, int pages); +static void unmap_grant_pages(struct gntdev_grant_map *map, + int offset, int pages); static struct miscdevice gntdev_miscdev; @@ -120,6 +122,7 @@ static void gntdev_free_map(struct gntdev_grant_map *map) kvfree(map->unmap_ops); kvfree(map->kmap_ops); kvfree(map->kunmap_ops); + kvfree(map->being_removed); kfree(map); } @@ -140,10 +143,13 @@ struct gntdev_grant_map *gntdev_alloc_map(struct gntdev_priv *priv, int count, add->unmap_ops = kvmalloc_array(count, sizeof(add->unmap_ops[0]), GFP_KERNEL); add->pages = kvcalloc(count, sizeof(add->pages[0]), GFP_KERNEL); + add->being_removed = + kvcalloc(count, sizeof(add->being_removed[0]), GFP_KERNEL); if (NULL == add->grants || NULL == add->map_ops || NULL == add->unmap_ops || - NULL == add->pages) + NULL == add->pages || + NULL == add->being_removed) goto err; if (use_ptemod) { add->kmap_ops = kvmalloc_array(count, sizeof(add->kmap_ops[0]), @@ -250,9 +256,36 @@ void gntdev_put_map(struct gntdev_priv *priv, struct gntdev_grant_map *map) if (!refcount_dec_and_test(&map->users)) return; - if (map->pages && !use_ptemod) + if (map->pages && !use_ptemod) { + /* + * Increment the reference count. This ensures that the + * subsequent call to unmap_grant_pages() will not wind up + * re-entering itself. It *can* wind up calling + * gntdev_put_map() recursively, but such calls will be with a + * reference count greater than 1, so they will return before + * this code is reached. The recursion depth is thus limited to + * 1. Do NOT use refcount_inc() here, as it will detect that + * the reference count is zero and WARN(). + */ + refcount_set(&map->users, 1); + + /* + * Unmap the grants. This may or may not be asynchronous, so it + * is possible that the reference count is 1 on return, but it + * could also be greater than 1. + */ unmap_grant_pages(map, 0, map->count); + /* Check if the memory now needs to be freed */ + if (!refcount_dec_and_test(&map->users)) + return; + + /* + * All pages have been returned to the hypervisor, so free the + * map. + */ + } + if (map->notify.flags & UNMAP_NOTIFY_SEND_EVENT) { notify_remote_via_evtchn(map->notify.event); evtchn_put(map->notify.event); @@ -283,6 +316,7 @@ static int find_grant_ptes(pte_t *pte, unsigned long addr, void *data) int gntdev_map_grant_pages(struct gntdev_grant_map *map) { + size_t alloced = 0; int i, err = 0; if (!use_ptemod) { @@ -331,97 +365,116 @@ int gntdev_map_grant_pages(struct gntdev_grant_map *map) map->count); for (i = 0; i < map->count; i++) { - if (map->map_ops[i].status == GNTST_okay) + if (map->map_ops[i].status == GNTST_okay) { map->unmap_ops[i].handle = map->map_ops[i].handle; - else if (!err) + if (!use_ptemod) + alloced++; + } else if (!err) err = -EINVAL; if (map->flags & GNTMAP_device_map) map->unmap_ops[i].dev_bus_addr = map->map_ops[i].dev_bus_addr; if (use_ptemod) { - if (map->kmap_ops[i].status == GNTST_okay) + if (map->kmap_ops[i].status == GNTST_okay) { + if (map->map_ops[i].status == GNTST_okay) + alloced++; map->kunmap_ops[i].handle = map->kmap_ops[i].handle; - else if (!err) + } else if (!err) err = -EINVAL; } } + atomic_add(alloced, &map->live_grants); return err; } -static int __unmap_grant_pages(struct gntdev_grant_map *map, int offset, - int pages) +static void __unmap_grant_pages_done(int result, + struct gntab_unmap_queue_data *data) { - int i, err = 0; - struct gntab_unmap_queue_data unmap_data; - - if (map->notify.flags & UNMAP_NOTIFY_CLEAR_BYTE) { - int pgno = (map->notify.addr >> PAGE_SHIFT); - if (pgno >= offset && pgno < offset + pages) { - /* No need for kmap, pages are in lowmem */ - uint8_t *tmp = pfn_to_kaddr(page_to_pfn(map->pages[pgno])); - tmp[map->notify.addr & (PAGE_SIZE-1)] = 0; - map->notify.flags &= ~UNMAP_NOTIFY_CLEAR_BYTE; - } - } - - unmap_data.unmap_ops = map->unmap_ops + offset; - unmap_data.kunmap_ops = use_ptemod ? map->kunmap_ops + offset : NULL; - unmap_data.pages = map->pages + offset; - unmap_data.count = pages; - - err = gnttab_unmap_refs_sync(&unmap_data); - if (err) - return err; + unsigned int i; + struct gntdev_grant_map *map = data->data; + unsigned int offset = data->unmap_ops - map->unmap_ops; - for (i = 0; i < pages; i++) { - if (map->unmap_ops[offset+i].status) - err = -EINVAL; + for (i = 0; i < data->count; i++) { + WARN_ON(map->unmap_ops[offset+i].status); pr_debug("unmap handle=%d st=%d\n", map->unmap_ops[offset+i].handle, map->unmap_ops[offset+i].status); map->unmap_ops[offset+i].handle = INVALID_GRANT_HANDLE; if (use_ptemod) { - if (map->kunmap_ops[offset+i].status) - err = -EINVAL; + WARN_ON(map->kunmap_ops[offset+i].status); pr_debug("kunmap handle=%u st=%d\n", map->kunmap_ops[offset+i].handle, map->kunmap_ops[offset+i].status); map->kunmap_ops[offset+i].handle = INVALID_GRANT_HANDLE; } } - return err; + /* + * Decrease the live-grant counter. This must happen after the loop to + * prevent premature reuse of the grants by gnttab_mmap(). + */ + atomic_sub(data->count, &map->live_grants); + + /* Release reference taken by __unmap_grant_pages */ + gntdev_put_map(NULL, map); +} + +static void __unmap_grant_pages(struct gntdev_grant_map *map, int offset, + int pages) +{ + if (map->notify.flags & UNMAP_NOTIFY_CLEAR_BYTE) { + int pgno = (map->notify.addr >> PAGE_SHIFT); + + if (pgno >= offset && pgno < offset + pages) { + /* No need for kmap, pages are in lowmem */ + uint8_t *tmp = pfn_to_kaddr(page_to_pfn(map->pages[pgno])); + + tmp[map->notify.addr & (PAGE_SIZE-1)] = 0; + map->notify.flags &= ~UNMAP_NOTIFY_CLEAR_BYTE; + } + } + + map->unmap_data.unmap_ops = map->unmap_ops + offset; + map->unmap_data.kunmap_ops = use_ptemod ? map->kunmap_ops + offset : NULL; + map->unmap_data.pages = map->pages + offset; + map->unmap_data.count = pages; + map->unmap_data.done = __unmap_grant_pages_done; + map->unmap_data.data = map; + refcount_inc(&map->users); /* to keep map alive during async call below */ + + gnttab_unmap_refs_async(&map->unmap_data); } -static int unmap_grant_pages(struct gntdev_grant_map *map, int offset, - int pages) +static void unmap_grant_pages(struct gntdev_grant_map *map, int offset, + int pages) { - int range, err = 0; + int range; + + if (atomic_read(&map->live_grants) == 0) + return; /* Nothing to do */ pr_debug("unmap %d+%d [%d+%d]\n", map->index, map->count, offset, pages); /* It is possible the requested range will have a "hole" where we * already unmapped some of the grants. Only unmap valid ranges. */ - while (pages && !err) { - while (pages && - map->unmap_ops[offset].handle == INVALID_GRANT_HANDLE) { + while (pages) { + while (pages && map->being_removed[offset]) { offset++; pages--; } range = 0; while (range < pages) { - if (map->unmap_ops[offset + range].handle == - INVALID_GRANT_HANDLE) + if (map->being_removed[offset + range]) break; + map->being_removed[offset + range] = true; range++; } - err = __unmap_grant_pages(map, offset, range); + if (range) + __unmap_grant_pages(map, offset, range); offset += range; pages -= range; } - - return err; } /* ------------------------------------------------------------------ */ @@ -473,7 +526,6 @@ static bool gntdev_invalidate(struct mmu_interval_notifier *mn, struct gntdev_grant_map *map = container_of(mn, struct gntdev_grant_map, notifier); unsigned long mstart, mend; - int err; if (!mmu_notifier_range_blockable(range)) return false; @@ -494,10 +546,9 @@ static bool gntdev_invalidate(struct mmu_interval_notifier *mn, map->index, map->count, map->vma->vm_start, map->vma->vm_end, range->start, range->end, mstart, mend); - err = unmap_grant_pages(map, + unmap_grant_pages(map, (mstart - map->vma->vm_start) >> PAGE_SHIFT, (mend - mstart) >> PAGE_SHIFT); - WARN_ON(err); return true; } @@ -985,6 +1036,10 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma) goto unlock_out; if (use_ptemod && map->vma) goto unlock_out; + if (atomic_read(&map->live_grants)) { + err = -EAGAIN; + goto unlock_out; + } refcount_inc(&map->users); vma->vm_ops = &gntdev_vmops; From 0f6f66b4ef2701079716859ec272c7e5a8de3d4f Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 24 Jun 2022 14:51:39 +0200 Subject: [PATCH 0022/1056] MAINTAINERS: Add new IOMMU development mailing list commit c242507c1b895646b4a25060df13b6214805759f upstream. The IOMMU mailing list will move from lists.linux-foundation.org to lists.linux.dev. The hard switch of the archive will happen on July 5th, but add the new list now already so that people start using the list when sending patches. After July 5th the old list will disappear. Cc: stable@vger.kernel.org Signed-off-by: Joerg Roedel Link: https://lore.kernel.org/r/20220624125139.412-1-joro@8bytes.org Signed-off-by: Joerg Roedel Signed-off-by: Greg Kroah-Hartman --- MAINTAINERS | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 942e0b173f2cc6..393706e85ba27a 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -434,6 +434,7 @@ ACPI VIOT DRIVER M: Jean-Philippe Brucker L: linux-acpi@vger.kernel.org L: iommu@lists.linux-foundation.org +L: iommu@lists.linux.dev S: Maintained F: drivers/acpi/viot.c F: include/linux/acpi_viot.h @@ -941,6 +942,7 @@ AMD IOMMU (AMD-VI) M: Joerg Roedel R: Suravee Suthikulpanit L: iommu@lists.linux-foundation.org +L: iommu@lists.linux.dev S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu.git F: drivers/iommu/amd/ @@ -5602,6 +5604,7 @@ M: Christoph Hellwig M: Marek Szyprowski R: Robin Murphy L: iommu@lists.linux-foundation.org +L: iommu@lists.linux.dev S: Supported W: http://git.infradead.org/users/hch/dma-mapping.git T: git git://git.infradead.org/users/hch/dma-mapping.git @@ -5614,6 +5617,7 @@ F: kernel/dma/ DMA MAPPING BENCHMARK M: Barry Song L: iommu@lists.linux-foundation.org +L: iommu@lists.linux.dev F: kernel/dma/map_benchmark.c F: tools/testing/selftests/dma/ @@ -7115,6 +7119,7 @@ F: drivers/gpu/drm/exynos/exynos_dp* EXYNOS SYSMMU (IOMMU) driver M: Marek Szyprowski L: iommu@lists.linux-foundation.org +L: iommu@lists.linux.dev S: Maintained F: drivers/iommu/exynos-iommu.c @@ -9457,6 +9462,7 @@ INTEL IOMMU (VT-d) M: David Woodhouse M: Lu Baolu L: iommu@lists.linux-foundation.org +L: iommu@lists.linux.dev S: Supported T: git git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu.git F: drivers/iommu/intel/ @@ -9793,6 +9799,7 @@ IOMMU DRIVERS M: Joerg Roedel M: Will Deacon L: iommu@lists.linux-foundation.org +L: iommu@lists.linux.dev S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu.git F: Documentation/devicetree/bindings/iommu/ @@ -11795,6 +11802,7 @@ F: drivers/i2c/busses/i2c-mt65xx.c MEDIATEK IOMMU DRIVER M: Yong Wu L: iommu@lists.linux-foundation.org +L: iommu@lists.linux.dev L: linux-mediatek@lists.infradead.org (moderated for non-subscribers) S: Supported F: Documentation/devicetree/bindings/iommu/mediatek* @@ -15554,6 +15562,7 @@ F: drivers/i2c/busses/i2c-qcom-cci.c QUALCOMM IOMMU M: Rob Clark L: iommu@lists.linux-foundation.org +L: iommu@lists.linux.dev L: linux-arm-msm@vger.kernel.org S: Maintained F: drivers/iommu/arm/arm-smmu/qcom_iommu.c @@ -17982,6 +17991,7 @@ F: arch/x86/boot/video* SWIOTLB SUBSYSTEM M: Christoph Hellwig L: iommu@lists.linux-foundation.org +L: iommu@lists.linux.dev S: Supported W: http://git.infradead.org/users/hch/dma-mapping.git T: git git://git.infradead.org/users/hch/dma-mapping.git @@ -20562,6 +20572,7 @@ M: Juergen Gross M: Stefano Stabellini L: xen-devel@lists.xenproject.org (moderated for non-subscribers) L: iommu@lists.linux-foundation.org +L: iommu@lists.linux.dev S: Supported F: arch/x86/xen/*swiotlb* F: drivers/xen/*swiotlb* From 0af674e7a764563496480c1e30fadf0048325978 Mon Sep 17 00:00:00 2001 From: Sascha Hauer Date: Tue, 14 Jun 2022 10:31:38 +0200 Subject: [PATCH 0023/1056] mtd: rawnand: gpmi: Fix setting busy timeout setting commit 06781a5026350cde699d2d10c9914a25c1524f45 upstream. The DEVICE_BUSY_TIMEOUT value is described in the Reference Manual as: | Timeout waiting for NAND Ready/Busy or ATA IRQ. Used in WAIT_FOR_READY | mode. This value is the number of GPMI_CLK cycles multiplied by 4096. So instead of multiplying the value in cycles with 4096, we have to divide it by that value. Use DIV_ROUND_UP to make sure we are on the safe side, especially when the calculated value in cycles is smaller than 4096 as typically the case. This bug likely never triggered because any timeout != 0 usually will do. In my case the busy timeout in cycles was originally calculated as 2408, which multiplied with 4096 is 0x968000. The lower 16 bits were taken for the 16 bit wide register field, so the register value was 0x8000. With 2970bf5a32f0 ("mtd: rawnand: gpmi: fix controller timings setting") however the value in cycles became 2384, which multiplied with 4096 is 0x950000. The lower 16 bit are 0x0 now resulting in an intermediate timeout when reading from NAND. Fixes: b1206122069aa ("mtd: rawnand: gpmi: use core timings instead of an empirical derivation") Cc: stable@vger.kernel.org Signed-off-by: Sascha Hauer Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20220614083138.3455683-1-s.hauer@pengutronix.de Signed-off-by: Greg Kroah-Hartman --- drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c b/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c index b72b387c08ef78..6eb0677777037f 100644 --- a/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c +++ b/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c @@ -685,7 +685,7 @@ static void gpmi_nfc_compute_timings(struct gpmi_nand_data *this, hw->timing0 = BF_GPMI_TIMING0_ADDRESS_SETUP(addr_setup_cycles) | BF_GPMI_TIMING0_DATA_HOLD(data_hold_cycles) | BF_GPMI_TIMING0_DATA_SETUP(data_setup_cycles); - hw->timing1 = BF_GPMI_TIMING1_BUSY_TIMEOUT(busy_timeout_cycles * 4096); + hw->timing1 = BF_GPMI_TIMING1_BUSY_TIMEOUT(DIV_ROUND_UP(busy_timeout_cycles, 4096)); /* * Derive NFC ideal delay from {3}: From c477de4c7d4374b7a2bed7b4b4792126dbd543c8 Mon Sep 17 00:00:00 2001 From: Edward Wu Date: Fri, 17 Jun 2022 11:32:20 +0800 Subject: [PATCH 0024/1056] ata: libata: add qc->flags in ata_qc_complete_template tracepoint commit 540a92bfe6dab7310b9df2e488ba247d784d0163 upstream. Add flags value to check the result of ata completion Fixes: 255c03d15a29 ("libata: Add tracepoints") Cc: stable@vger.kernel.org Signed-off-by: Edward Wu Signed-off-by: Damien Le Moal Signed-off-by: Greg Kroah-Hartman --- include/trace/events/libata.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/trace/events/libata.h b/include/trace/events/libata.h index ab69434e2329e0..72e785a903b656 100644 --- a/include/trace/events/libata.h +++ b/include/trace/events/libata.h @@ -249,6 +249,7 @@ DECLARE_EVENT_CLASS(ata_qc_complete_template, __entry->hob_feature = qc->result_tf.hob_feature; __entry->nsect = qc->result_tf.nsect; __entry->hob_nsect = qc->result_tf.hob_nsect; + __entry->flags = qc->flags; ), TP_printk("ata_port=%u ata_dev=%u tag=%d flags=%s status=%s " \ From ac0a5f701f4d5549e494bc36276aebb7dc480dab Mon Sep 17 00:00:00 2001 From: Nikos Tsironis Date: Tue, 21 Jun 2022 15:24:03 +0300 Subject: [PATCH 0025/1056] dm era: commit metadata in postsuspend after worker stops commit 9ae6e8b1c9bbf6874163d1243e393137313762b7 upstream. During postsuspend dm-era does the following: 1. Archives the current era 2. Commits the metadata, as part of the RPC call for archiving the current era 3. Stops the worker Until the worker stops, it might write to the metadata again. Moreover, these writes are not flushed to disk immediately, but are cached by the dm-bufio client, which writes them back asynchronously. As a result, the committed metadata of a suspended dm-era device might not be consistent with the in-core metadata. In some cases, this can result in the corruption of the on-disk metadata. Suppose the following sequence of events: 1. Load a new table, e.g. a snapshot-origin table, to a device with a dm-era table 2. Suspend the device 3. dm-era commits its metadata, but the worker does a few more metadata writes until it stops, as part of digesting an archived writeset 4. These writes are cached by the dm-bufio client 5. Load the dm-era table to another device. 6. The new instance of the dm-era target loads the committed, on-disk metadata, which don't include the extra writes done by the worker after the metadata commit. 7. Resume the new device 8. The new dm-era target instance starts using the metadata 9. Resume the original device 10. The destructor of the old dm-era target instance is called and destroys the dm-bufio client, which results in flushing the cached writes to disk 11. These writes might overwrite the writes done by the new dm-era instance, hence corrupting its metadata. Fix this by committing the metadata after the worker stops running. stop_worker uses flush_workqueue to flush the current work. However, the work item may re-queue itself and flush_workqueue doesn't wait for re-queued works to finish. This could result in the worker changing the metadata after they have been committed, or writing to the metadata concurrently with the commit in the postsuspend thread. Use drain_workqueue instead, which waits until the work and all re-queued works finish. Fixes: eec40579d8487 ("dm: add era target") Cc: stable@vger.kernel.org # v3.15+ Signed-off-by: Nikos Tsironis Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-era-target.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/md/dm-era-target.c b/drivers/md/dm-era-target.c index 2a78f687414317..a56df453660594 100644 --- a/drivers/md/dm-era-target.c +++ b/drivers/md/dm-era-target.c @@ -1400,7 +1400,7 @@ static void start_worker(struct era *era) static void stop_worker(struct era *era) { atomic_set(&era->suspended, 1); - flush_workqueue(era->wq); + drain_workqueue(era->wq); } /*---------------------------------------------------------------- @@ -1570,6 +1570,12 @@ static void era_postsuspend(struct dm_target *ti) } stop_worker(era); + + r = metadata_commit(era->md); + if (r) { + DMERR("%s: metadata_commit failed", __func__); + /* FIXME: fail mode */ + } } static int era_preresume(struct dm_target *ti) From c3f51b28a8bcbb4ca92e2cec578ce4523f471ab4 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Thu, 23 Jun 2022 14:53:25 -0400 Subject: [PATCH 0026/1056] dm mirror log: clear log bits up to BITS_PER_LONG boundary commit 90736eb3232d208ee048493f371075e4272e0944 upstream. Commit 85e123c27d5c ("dm mirror log: round up region bitmap size to BITS_PER_LONG") introduced a regression on 64-bit architectures in the lvm testsuite tests: lvcreate-mirror, mirror-names and vgsplit-operation. If the device is shrunk, we need to clear log bits beyond the end of the device. The code clears bits up to a 32-bit boundary and then calculates lc->sync_count by summing set bits up to a 64-bit boundary (the commit changed that; previously, this boundary was 32-bit too). So, it was using some non-zeroed bits in the calculation and this caused misbehavior. Fix this regression by clearing bits up to BITS_PER_LONG boundary. Fixes: 85e123c27d5c ("dm mirror log: round up region bitmap size to BITS_PER_LONG") Cc: stable@vger.kernel.org Reported-by: Benjamin Marzinski Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-log.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/dm-log.c b/drivers/md/dm-log.c index ccdd65148498f9..b40741bedfd43d 100644 --- a/drivers/md/dm-log.c +++ b/drivers/md/dm-log.c @@ -615,7 +615,7 @@ static int disk_resume(struct dm_dirty_log *log) log_clear_bit(lc, lc->clean_bits, i); /* clear any old bits -- device has shrunk */ - for (i = lc->region_count; i % (sizeof(*lc->clean_bits) << BYTE_SHIFT); i++) + for (i = lc->region_count; i % BITS_PER_LONG; i++) log_clear_bit(lc, lc->clean_bits, i); /* copy clean across to sync */ From bae4d6a2dd9ecc537e864814629bbae43c073dce Mon Sep 17 00:00:00 2001 From: "Masami Hiramatsu (Google)" Date: Sat, 28 May 2022 00:55:39 +0900 Subject: [PATCH 0027/1056] tracing/kprobes: Check whether get_kretprobe() returns NULL in kretprobe_dispatcher() commit cc72b72073ac982a954d3b43519ca1c28f03c27c upstream. There is a small chance that get_kretprobe(ri) returns NULL in kretprobe_dispatcher() when another CPU unregisters the kretprobe right after __kretprobe_trampoline_handler(). To avoid this issue, kretprobe_dispatcher() checks the get_kretprobe() return value again. And if it is NULL, it returns soon because that kretprobe is under unregistering process. This issue has been introduced when the kretprobe is decoupled from the struct kretprobe_instance by commit d741bf41d7c7 ("kprobes: Remove kretprobe hash"). Before that commit, the struct kretprob_instance::rp directly points the kretprobe and it is never be NULL. Link: https://lkml.kernel.org/r/165366693881.797669.16926184644089588731.stgit@devnote2 Reported-by: Yonghong Song Fixes: d741bf41d7c7 ("kprobes: Remove kretprobe hash") Cc: Peter Zijlstra Cc: Ingo Molnar Cc: bpf Cc: Kernel Team Cc: stable@vger.kernel.org Signed-off-by: Masami Hiramatsu (Google) Acked-by: Jiri Olsa Signed-off-by: Steven Rostedt (Google) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace_kprobe.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 39ee60725519bb..6a9c1ef15d5d8e 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -1733,8 +1733,17 @@ static int kretprobe_dispatcher(struct kretprobe_instance *ri, struct pt_regs *regs) { struct kretprobe *rp = get_kretprobe(ri); - struct trace_kprobe *tk = container_of(rp, struct trace_kprobe, rp); + struct trace_kprobe *tk; + + /* + * There is a small chance that get_kretprobe(ri) returns NULL when + * the kretprobe is unregister on another CPU between kretprobe's + * trampoline_handler and this function. + */ + if (unlikely(!rp)) + return 0; + tk = container_of(rp, struct trace_kprobe, rp); raw_cpu_inc(*tk->nhit); if (trace_probe_test_flag(&tk->tp, TP_FLAG_TRACE)) From 0895a2235bae6671077c2de94268cccc346005d6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 13 Jun 2022 23:14:39 +0300 Subject: [PATCH 0028/1056] drm/i915: Implement w/a 22010492432 for adl-s MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 13bd259b64bb58ae130923ada42ebc19bf3f2fa2 upstream. adl-s needs the combo PLL DCO fraction w/a as well. Gets us slightly more accurate clock out of the PLL. Cc: stable@vger.kernel.org Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20220613201439.23341-1-ville.syrjala@linux.intel.com Reviewed-by: Matt Roper (cherry picked from commit d36bdd77b9e6aa7f5cb7b0f11ebbab8e5febf10b) Signed-off-by: Jani Nikula Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/display/intel_dpll_mgr.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c index 5c91d125a3371d..3dfa600fb86d68 100644 --- a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c +++ b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c @@ -2434,7 +2434,7 @@ static void icl_wrpll_params_populate(struct skl_wrpll_params *params, } /* - * Display WA #22010492432: ehl, tgl, adl-p + * Display WA #22010492432: ehl, tgl, adl-s, adl-p * Program half of the nominal DCO divider fraction value. */ static bool @@ -2442,7 +2442,7 @@ ehl_combo_pll_div_frac_wa_needed(struct drm_i915_private *i915) { return ((IS_PLATFORM(i915, INTEL_ELKHARTLAKE) && IS_JSL_EHL_DISPLAY_STEP(i915, STEP_B0, STEP_FOREVER)) || - IS_TIGERLAKE(i915) || IS_ALDERLAKE_P(i915)) && + IS_TIGERLAKE(i915) || IS_ALDERLAKE_S(i915) || IS_ALDERLAKE_P(i915)) && i915->dpll.ref_clks.nssc == 38400; } From e5b0f42edda25516d53f258b029cd8029115efca Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 21 Jun 2022 10:58:55 +0200 Subject: [PATCH 0029/1056] USB: serial: pl2303: add support for more HXN (G) types commit ae60aac59a9ad8ab64a4b07de509a534a75b6bac upstream. Add support for further HXN (G) type devices (GT variant, GL variant, GS variant and GR) and document the bcdDevice mapping. Note that the TA and TB types use the same bcdDevice as some GT and GE variants, respectively, but that the HX status request can be used to determine which is which. Also note that we currently do not distinguish between the various HXN (G) types in the driver but that this may change eventually (e.g. when adding GPIO support). Reported-by: Charles Yeh Link: https://lore.kernel.org/r/YrF77b9DdeumUAee@hovoldconsulting.com Cc: stable@vger.kernel.org # 5.13 Reviewed-by: Greg Kroah-Hartman Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/pl2303.c | 29 +++++++++++++++++------------ 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/drivers/usb/serial/pl2303.c b/drivers/usb/serial/pl2303.c index 3506c47e1eef02..40b1ab3d284dcb 100644 --- a/drivers/usb/serial/pl2303.c +++ b/drivers/usb/serial/pl2303.c @@ -436,22 +436,27 @@ static int pl2303_detect_type(struct usb_serial *serial) break; case 0x200: switch (bcdDevice) { - case 0x100: + case 0x100: /* GC */ case 0x105: + return TYPE_HXN; + case 0x300: /* GT / TA */ + if (pl2303_supports_hx_status(serial)) + return TYPE_TA; + fallthrough; case 0x305: + case 0x400: /* GL */ case 0x405: + return TYPE_HXN; + case 0x500: /* GE / TB */ + if (pl2303_supports_hx_status(serial)) + return TYPE_TB; + fallthrough; + case 0x505: + case 0x600: /* GS */ case 0x605: - /* - * Assume it's an HXN-type if the device doesn't - * support the old read request value. - */ - if (!pl2303_supports_hx_status(serial)) - return TYPE_HXN; - break; - case 0x300: - return TYPE_TA; - case 0x500: - return TYPE_TB; + case 0x700: /* GR */ + case 0x705: + return TYPE_HXN; } break; } From b919ece13b6b9db994ad4a747df64ee1f0ae3674 Mon Sep 17 00:00:00 2001 From: Carlo Lobrano Date: Tue, 14 Jun 2022 09:56:23 +0200 Subject: [PATCH 0030/1056] USB: serial: option: add Telit LE910Cx 0x1250 composition commit 342fc0c3b345525da21112bd0478a0dc741598ea upstream. Add support for the following Telit LE910Cx composition: 0x1250: rmnet, tty, tty, tty, tty Reviewed-by: Daniele Palmas Signed-off-by: Carlo Lobrano Link: https://lore.kernel.org/r/20220614075623.2392607-1-c.lobrano@gmail.com Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/option.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index ed1e50d83ccab1..222b1e3d45a6f9 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -1279,6 +1279,7 @@ static const struct usb_device_id option_ids[] = { .driver_info = NCTRL(0) | RSVD(1) | RSVD(2) }, { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1231, 0xff), /* Telit LE910Cx (RNDIS) */ .driver_info = NCTRL(2) | RSVD(3) }, + { USB_DEVICE_AND_INTERFACE_INFO(TELIT_VENDOR_ID, 0x1250, 0xff, 0x00, 0x00) }, /* Telit LE910Cx (rmnet) */ { USB_DEVICE(TELIT_VENDOR_ID, 0x1260), .driver_info = NCTRL(0) | RSVD(1) | RSVD(2) }, { USB_DEVICE(TELIT_VENDOR_ID, 0x1261), From d5eb7d6baed5d725128a88f75d3ff9806418eb20 Mon Sep 17 00:00:00 2001 From: Yonglin Tan Date: Tue, 21 Jun 2022 20:37:53 +0800 Subject: [PATCH 0031/1056] USB: serial: option: add Quectel EM05-G modem commit 33b29dbb39bcbd0a96e440646396bbf670b914fa upstream. The EM05-G modem has 2 USB configurations that are configurable via the AT command AT+QCFG="usbnet",[ 0 | 2 ] which make the modem enumerate with the following interfaces, respectively: "RMNET" : AT + DIAG + NMEA + Modem + QMI "MBIM" : MBIM + AT + DIAG + NMEA + Modem The detailed description of the USB configuration for each mode as follows: RMNET Mode -------------- T: Bus=01 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#= 21 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=ef(misc ) Sub=02 Prot=01 MxPS=64 #Cfgs= 1 P: Vendor=2c7c ProdID=030a Rev= 3.18 S: Manufacturer=Quectel S: Product=Quectel EM05-G C:* #Ifs= 5 Cfg#= 1 Atr=a0 MxPwr=500mA I:* If#= 3 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 4 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=83(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=82(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=85(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=84(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 5 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=87(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=86(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 6 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=(none) E: Ad=89(I) Atr=03(Int.) MxPS= 8 Ivl=32ms E: Ad=88(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=05(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms MBIM Mode -------------- T: Bus=01 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#= 16 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=ef(misc ) Sub=02 Prot=01 MxPS=64 #Cfgs= 1 P: Vendor=2c7c ProdID=030a Rev= 3.18 S: Manufacturer=Quectel S: Product=Quectel EM05-G C:* #Ifs= 6 Cfg#= 1 Atr=a0 MxPwr=500mA A: FirstIf#= 0 IfCount= 2 Cls=02(comm.) Sub=0e Prot=00 I:* If#= 3 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 4 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=83(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=82(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=85(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=84(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 5 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=87(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=86(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 0 Alt= 0 #EPs= 1 Cls=02(comm.) Sub=0e Prot=00 Driver=cdc_mbim E: Ad=89(I) Atr=03(Int.) MxPS= 64 Ivl=32ms I: If#= 1 Alt= 0 #EPs= 0 Cls=0a(data ) Sub=00 Prot=02 Driver=cdc_mbim I:* If#= 1 Alt= 1 #EPs= 2 Cls=0a(data ) Sub=00 Prot=02 Driver=cdc_mbim E: Ad=88(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=05(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms Signed-off-by: Yonglin Tan Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/option.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 222b1e3d45a6f9..61d34886c70664 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -252,6 +252,7 @@ static void option_instat_callback(struct urb *urb); #define QUECTEL_PRODUCT_EG95 0x0195 #define QUECTEL_PRODUCT_BG96 0x0296 #define QUECTEL_PRODUCT_EP06 0x0306 +#define QUECTEL_PRODUCT_EM05G 0x030a #define QUECTEL_PRODUCT_EM12 0x0512 #define QUECTEL_PRODUCT_RM500Q 0x0800 #define QUECTEL_PRODUCT_EC200S_CN 0x6002 @@ -1134,6 +1135,8 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EP06, 0xff, 0xff, 0xff), .driver_info = RSVD(1) | RSVD(2) | RSVD(3) | RSVD(4) | NUMEP2 }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EP06, 0xff, 0, 0) }, + { USB_DEVICE_INTERFACE_CLASS(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM05G, 0xff), + .driver_info = RSVD(6) | ZLP }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM12, 0xff, 0xff, 0xff), .driver_info = RSVD(1) | RSVD(2) | RSVD(3) | RSVD(4) | NUMEP2 }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM12, 0xff, 0, 0) }, From 45dc151ca0b9a03a408e77377840f42db004fbf9 Mon Sep 17 00:00:00 2001 From: Macpaul Lin Date: Thu, 23 Jun 2022 16:56:44 +0800 Subject: [PATCH 0032/1056] USB: serial: option: add Quectel RM500K module support commit 15b694e96c31807d8515aacfa687a1e8a4fbbadc upstream. Add usb product id of the Quectel RM500K module. RM500K provides 2 mandatory interfaces to Linux host after enumeration. - /dev/ttyUSB5: this is a serial interface for control path. User needs to write AT commands to this device node to query status, set APN, set PIN code, and enable/disable the data connection to 5G network. - ethX: this is the data path provided as a RNDIS devices. After the data connection has been established, Linux host can access 5G data network via this interface. "RNDIS": RNDIS + ADB + AT (/dev/ttyUSB5) + MODEM COMs usb-devices output for 0x7001: T: Bus=05 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#= 3 Spd=480 MxCh= 0 D: Ver= 2.10 Cls=ef(misc ) Sub=02 Prot=01 MxPS=64 #Cfgs= 1 P: Vendor=2c7c ProdID=7001 Rev=00.01 S: Manufacturer=MediaTek Inc. S: Product=USB DATA CARD S: SerialNumber=869206050009672 C: #Ifs=10 Cfg#= 1 Atr=a0 MxPwr=500mA I: If#= 0 Alt= 0 #EPs= 1 Cls=02(commc) Sub=02 Prot=ff Driver=rndis_host E: Ad=82(I) Atr=03(Int.) MxPS= 64 Ivl=125us I: If#= 1 Alt= 0 #EPs= 2 Cls=0a(data ) Sub=00 Prot=00 Driver=rndis_host E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 2 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=83(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 3 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=84(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 4 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=85(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 5 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=42 Prot=01 Driver=(none) E: Ad=05(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=86(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 6 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=06(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=87(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 7 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=07(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=88(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 8 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=08(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=89(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 9 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=09(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=8a(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms Co-developed-by: Ballon Shi Signed-off-by: Ballon Shi Signed-off-by: Macpaul Lin Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/option.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 61d34886c70664..de59fa919540a3 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -257,6 +257,7 @@ static void option_instat_callback(struct urb *urb); #define QUECTEL_PRODUCT_RM500Q 0x0800 #define QUECTEL_PRODUCT_EC200S_CN 0x6002 #define QUECTEL_PRODUCT_EC200T 0x6026 +#define QUECTEL_PRODUCT_RM500K 0x7001 #define CMOTECH_VENDOR_ID 0x16d8 #define CMOTECH_PRODUCT_6001 0x6001 @@ -1150,6 +1151,7 @@ static const struct usb_device_id option_ids[] = { .driver_info = ZLP }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EC200S_CN, 0xff, 0, 0) }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EC200T, 0xff, 0, 0) }, + { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_RM500K, 0xff, 0x00, 0x00) }, { USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_6001) }, { USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_CMU_300) }, From c6f6c966860997e5bbeae7c99cd8985a9717e56b Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Tue, 31 May 2022 13:08:56 -0700 Subject: [PATCH 0033/1056] drm/msm: Ensure mmap offset is initialized [ Upstream commit 036d20726c30267724416e966c9f92db07de8081 ] If a GEM object is allocated, and then exported as a dma-buf fd which is mmap'd before or without the GEM buffer being directly mmap'd, the vma_node could be unitialized. This leads to a situation where the CPU mapping is not correctly torn down in drm_vma_node_unmap(). Fixes: e5516553999f ("drm: call drm_gem_object_funcs.mmap with fake offset") Signed-off-by: Rob Clark Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20220531200857.136547-1-robdclark@gmail.com Signed-off-by: Sasha Levin --- drivers/gpu/drm/msm/msm_drv.c | 2 +- drivers/gpu/drm/msm/msm_drv.h | 1 + drivers/gpu/drm/msm/msm_gem_prime.c | 15 +++++++++++++++ 3 files changed, 17 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c index 9712582886aac6..916361c30d7745 100644 --- a/drivers/gpu/drm/msm/msm_drv.c +++ b/drivers/gpu/drm/msm/msm_drv.c @@ -1102,7 +1102,7 @@ static const struct drm_driver msm_driver = { .prime_handle_to_fd = drm_gem_prime_handle_to_fd, .prime_fd_to_handle = drm_gem_prime_fd_to_handle, .gem_prime_import_sg_table = msm_gem_prime_import_sg_table, - .gem_prime_mmap = drm_gem_prime_mmap, + .gem_prime_mmap = msm_gem_prime_mmap, #ifdef CONFIG_DEBUG_FS .debugfs_init = msm_debugfs_init, #endif diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h index c552f0c3890c1c..bd5132bb9bde27 100644 --- a/drivers/gpu/drm/msm/msm_drv.h +++ b/drivers/gpu/drm/msm/msm_drv.h @@ -298,6 +298,7 @@ unsigned long msm_gem_shrinker_shrink(struct drm_device *dev, unsigned long nr_t void msm_gem_shrinker_init(struct drm_device *dev); void msm_gem_shrinker_cleanup(struct drm_device *dev); +int msm_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma); struct sg_table *msm_gem_prime_get_sg_table(struct drm_gem_object *obj); int msm_gem_prime_vmap(struct drm_gem_object *obj, struct dma_buf_map *map); void msm_gem_prime_vunmap(struct drm_gem_object *obj, struct dma_buf_map *map); diff --git a/drivers/gpu/drm/msm/msm_gem_prime.c b/drivers/gpu/drm/msm/msm_gem_prime.c index 8a2d94bd5df289..02c70a0b2a036b 100644 --- a/drivers/gpu/drm/msm/msm_gem_prime.c +++ b/drivers/gpu/drm/msm/msm_gem_prime.c @@ -11,6 +11,21 @@ #include "msm_drv.h" #include "msm_gem.h" +int msm_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma) +{ + int ret; + + /* Ensure the mmap offset is initialized. We lazily initialize it, + * so if it has not been first mmap'd directly as a GEM object, the + * mmap offset will not be already initialized. + */ + ret = drm_gem_create_mmap_offset(obj); + if (ret) + return ret; + + return drm_gem_prime_mmap(obj, vma); +} + struct sg_table *msm_gem_prime_get_sg_table(struct drm_gem_object *obj) { struct msm_gem_object *msm_obj = to_msm_bo(obj); From f7fa3263079c55e2bbdcd5ff763c19b665e329ed Mon Sep 17 00:00:00 2001 From: Maximilian Luz Date: Mon, 6 Jun 2022 23:13:05 +0200 Subject: [PATCH 0034/1056] drm/msm: Fix double pm_runtime_disable() call [ Upstream commit ce0db505bc0c51ef5e9ba446c660de7e26f78f29 ] Following commit 17e822f7591f ("drm/msm: fix unbalanced pm_runtime_enable in adreno_gpu_{init, cleanup}"), any call to adreno_unbind() will disable runtime PM twice, as indicated by the call trees below: adreno_unbind() -> pm_runtime_force_suspend() -> pm_runtime_disable() adreno_unbind() -> gpu->funcs->destroy() [= aNxx_destroy()] -> adreno_gpu_cleanup() -> pm_runtime_disable() Note that pm_runtime_force_suspend() is called right before gpu->funcs->destroy() and both functions are called unconditionally. With recent addition of the eDP AUX bus code, this problem manifests itself when the eDP panel cannot be found yet and probing is deferred. On the first probe attempt, we disable runtime PM twice as described above. This then causes any later probe attempt to fail with [drm:adreno_load_gpu [msm]] *ERROR* Couldn't power up the GPU: -13 preventing the driver from loading. As there seem to be scenarios where the aNxx_destroy() functions are not called from adreno_unbind(), simply removing pm_runtime_disable() from inside adreno_unbind() does not seem to be the proper fix. This is what commit 17e822f7591f ("drm/msm: fix unbalanced pm_runtime_enable in adreno_gpu_{init, cleanup}") intended to fix. Therefore, instead check whether runtime PM is still enabled, and only disable it in that case. Fixes: 17e822f7591f ("drm/msm: fix unbalanced pm_runtime_enable in adreno_gpu_{init, cleanup}") Signed-off-by: Maximilian Luz Tested-by: Bjorn Andersson Reviewed-by: Rob Clark Link: https://lore.kernel.org/r/20220606211305.189585-1-luzmaximilian@gmail.com Signed-off-by: Rob Clark Signed-off-by: Sasha Levin --- drivers/gpu/drm/msm/adreno/adreno_gpu.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c index 748665232d2968..bba68776cb25db 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c @@ -958,7 +958,8 @@ void adreno_gpu_cleanup(struct adreno_gpu *adreno_gpu) for (i = 0; i < ARRAY_SIZE(adreno_gpu->info->fw); i++) release_firmware(adreno_gpu->fw[i]); - pm_runtime_disable(&priv->gpu_pdev->dev); + if (pm_runtime_enabled(&priv->gpu_pdev->dev)) + pm_runtime_disable(&priv->gpu_pdev->dev); msm_gpu_cleanup(&adreno_gpu->base); } From d0906b0fffc9f19bc42708ca3e84e2089088386c Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 18 May 2022 20:15:31 +0200 Subject: [PATCH 0035/1056] netfilter: use get_random_u32 instead of prandom [ Upstream commit b1fd94e704571f98b21027340eecf821b2bdffba ] bh might occur while updating per-cpu rnd_state from user context, ie. local_out path. BUG: using smp_processor_id() in preemptible [00000000] code: nginx/2725 caller is nft_ng_random_eval+0x24/0x54 [nft_numgen] Call Trace: check_preemption_disabled+0xde/0xe0 nft_ng_random_eval+0x24/0x54 [nft_numgen] Use the random driver instead, this also avoids need for local prandom state. Moreover, prandom now uses the random driver since d4150779e60f ("random32: use real rng for non-deterministic randomness"). Based on earlier patch from Pablo Neira. Fixes: 6b2faee0ca91 ("netfilter: nft_meta: place prandom handling in a helper") Fixes: 978d8f9055c3 ("netfilter: nft_numgen: add map lookups for numgen random operations") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/nft_meta.c | 13 ++----------- net/netfilter/nft_numgen.c | 12 +++--------- 2 files changed, 5 insertions(+), 20 deletions(-) diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c index a7e01e9952f171..44d9b38e5f90c5 100644 --- a/net/netfilter/nft_meta.c +++ b/net/netfilter/nft_meta.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -32,8 +33,6 @@ #define NFT_META_SECS_PER_DAY 86400 #define NFT_META_DAYS_PER_WEEK 7 -static DEFINE_PER_CPU(struct rnd_state, nft_prandom_state); - static u8 nft_meta_weekday(void) { time64_t secs = ktime_get_real_seconds(); @@ -267,13 +266,6 @@ static bool nft_meta_get_eval_ifname(enum nft_meta_keys key, u32 *dest, return true; } -static noinline u32 nft_prandom_u32(void) -{ - struct rnd_state *state = this_cpu_ptr(&nft_prandom_state); - - return prandom_u32_state(state); -} - #ifdef CONFIG_IP_ROUTE_CLASSID static noinline bool nft_meta_get_eval_rtclassid(const struct sk_buff *skb, u32 *dest) @@ -385,7 +377,7 @@ void nft_meta_get_eval(const struct nft_expr *expr, break; #endif case NFT_META_PRANDOM: - *dest = nft_prandom_u32(); + *dest = get_random_u32(); break; #ifdef CONFIG_XFRM case NFT_META_SECPATH: @@ -514,7 +506,6 @@ int nft_meta_get_init(const struct nft_ctx *ctx, len = IFNAMSIZ; break; case NFT_META_PRANDOM: - prandom_init_once(&nft_prandom_state); len = sizeof(u32); break; #ifdef CONFIG_XFRM diff --git a/net/netfilter/nft_numgen.c b/net/netfilter/nft_numgen.c index 722cac1e90e0e3..4e43214e88def7 100644 --- a/net/netfilter/nft_numgen.c +++ b/net/netfilter/nft_numgen.c @@ -9,12 +9,11 @@ #include #include #include +#include #include #include #include -static DEFINE_PER_CPU(struct rnd_state, nft_numgen_prandom_state); - struct nft_ng_inc { u8 dreg; u32 modulus; @@ -104,12 +103,9 @@ struct nft_ng_random { u32 offset; }; -static u32 nft_ng_random_gen(struct nft_ng_random *priv) +static u32 nft_ng_random_gen(const struct nft_ng_random *priv) { - struct rnd_state *state = this_cpu_ptr(&nft_numgen_prandom_state); - - return reciprocal_scale(prandom_u32_state(state), priv->modulus) + - priv->offset; + return reciprocal_scale(get_random_u32(), priv->modulus) + priv->offset; } static void nft_ng_random_eval(const struct nft_expr *expr, @@ -137,8 +133,6 @@ static int nft_ng_random_init(const struct nft_ctx *ctx, if (priv->offset + priv->modulus - 1 < priv->offset) return -EOVERFLOW; - prandom_init_once(&nft_numgen_prandom_state); - return nft_parse_register_store(ctx, tb[NFTA_NG_DREG], &priv->dreg, NULL, NFT_DATA_VALUE, sizeof(u32)); } From 7d5fe94333a907a3422b4ddf8501fa1d2abdc26c Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Wed, 8 Jun 2022 10:13:02 +0900 Subject: [PATCH 0036/1056] scsi: scsi_debug: Fix zone transition to full condition [ Upstream commit 566d3c57eb526f32951af15866086e236ce1fc8a ] When a write command to a sequential write required or sequential write preferred zone result in the zone write pointer reaching the end of the zone, the zone condition must be set to full AND the number of implicitly or explicitly open zones updated to have a correct accounting for zone resources. However, the function zbc_inc_wp() only sets the zone condition to full without updating the open zone counters, resulting in a zone state machine breakage. Introduce the helper function zbc_set_zone_full() and use it in zbc_inc_wp() to correctly transition zones to the full condition. Link: https://lore.kernel.org/r/20220608011302.92061-1-damien.lemoal@opensource.wdc.com Fixes: f0d1cf9378bd ("scsi: scsi_debug: Add ZBC zone commands") Reviewed-by: Niklas Cassel Acked-by: Douglas Gilbert Signed-off-by: Damien Le Moal Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/scsi_debug.c | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c index cfeadd5f61f183..747e1cbb7ec914 100644 --- a/drivers/scsi/scsi_debug.c +++ b/drivers/scsi/scsi_debug.c @@ -2747,6 +2747,24 @@ static void zbc_open_zone(struct sdebug_dev_info *devip, } } +static inline void zbc_set_zone_full(struct sdebug_dev_info *devip, + struct sdeb_zone_state *zsp) +{ + switch (zsp->z_cond) { + case ZC2_IMPLICIT_OPEN: + devip->nr_imp_open--; + break; + case ZC3_EXPLICIT_OPEN: + devip->nr_exp_open--; + break; + default: + WARN_ONCE(true, "Invalid zone %llu condition %x\n", + zsp->z_start, zsp->z_cond); + break; + } + zsp->z_cond = ZC5_FULL; +} + static void zbc_inc_wp(struct sdebug_dev_info *devip, unsigned long long lba, unsigned int num) { @@ -2759,7 +2777,7 @@ static void zbc_inc_wp(struct sdebug_dev_info *devip, if (zsp->z_type == ZBC_ZONE_TYPE_SWR) { zsp->z_wp += num; if (zsp->z_wp >= zend) - zsp->z_cond = ZC5_FULL; + zbc_set_zone_full(devip, zsp); return; } @@ -2778,7 +2796,7 @@ static void zbc_inc_wp(struct sdebug_dev_info *devip, n = num; } if (zsp->z_wp >= zend) - zsp->z_cond = ZC5_FULL; + zbc_set_zone_full(devip, zsp); num -= n; lba += n; From 0a7a5261705fceded5592316ac92b4f964296c74 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Wed, 8 Jun 2022 09:13:34 -0700 Subject: [PATCH 0037/1056] drm/msm: Switch ordering of runpm put vs devfreq_idle [ Upstream commit 49e477610087a02c3604061b8f3ee3a25a493987 ] In msm_devfreq_suspend() we cancel idle_work synchronously so that it doesn't run after we power of the hw or in the resume path. But this means that we want to ensure that idle_work is not scheduled *after* we no longer hold a runpm ref. So switch the ordering of pm_runtime_put() vs msm_devfreq_idle(). v2. Only move the runpm _put_autosuspend, and not the _mark_last_busy() Fixes: 9bc95570175a ("drm/msm: Devfreq tuning") Signed-off-by: Rob Clark Link: https://lore.kernel.org/r/20210927152928.831245-1-robdclark@gmail.com Reviewed-by: Akhil P Oommen Reviewed-by: Douglas Anderson Link: https://lore.kernel.org/r/20220608161334.2140611-1-robdclark@gmail.com Signed-off-by: Rob Clark Signed-off-by: Sasha Levin --- drivers/gpu/drm/msm/msm_gpu.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c index 2c46cd968ac4c7..b01d0a521c9081 100644 --- a/drivers/gpu/drm/msm/msm_gpu.c +++ b/drivers/gpu/drm/msm/msm_gpu.c @@ -658,7 +658,6 @@ static void retire_submit(struct msm_gpu *gpu, struct msm_ringbuffer *ring, msm_submit_retire(submit); pm_runtime_mark_last_busy(&gpu->pdev->dev); - pm_runtime_put_autosuspend(&gpu->pdev->dev); spin_lock_irqsave(&ring->submit_lock, flags); list_del(&submit->node); @@ -672,6 +671,8 @@ static void retire_submit(struct msm_gpu *gpu, struct msm_ringbuffer *ring, msm_devfreq_idle(gpu); mutex_unlock(&gpu->active_lock); + pm_runtime_put_autosuspend(&gpu->pdev->dev); + msm_gem_submit_put(submit); } From 54abcc525269f5041c1b2851f0be138503d8b43b Mon Sep 17 00:00:00 2001 From: Sergey Gorenko Date: Mon, 13 Jun 2022 15:38:54 +0300 Subject: [PATCH 0038/1056] scsi: iscsi: Exclude zero from the endpoint ID range [ Upstream commit f6eed15f3ea76596ccc689331e1cc850b999133b ] The kernel returns an endpoint ID as r.ep_connect_ret.handle in the iscsi_uevent. The iscsid validates a received endpoint ID and treats zero as an error. The commit referenced in the fixes line changed the endpoint ID range, and zero is always assigned to the first endpoint ID. So, the first attempt to create a new iSER connection always fails. Link: https://lore.kernel.org/r/20220613123854.55073-1-sergeygo@nvidia.com Fixes: 3c6ae371b8a1 ("scsi: iscsi: Release endpoint ID when its freed") Reviewed-by: Max Gurtovoy Reviewed-by: Mike Christie Reviewed-by: Lee Duncan Signed-off-by: Sergey Gorenko Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/scsi_transport_iscsi.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c index bcdfcb25349ad8..5947b9d5746e17 100644 --- a/drivers/scsi/scsi_transport_iscsi.c +++ b/drivers/scsi/scsi_transport_iscsi.c @@ -213,7 +213,12 @@ iscsi_create_endpoint(int dd_size) return NULL; mutex_lock(&iscsi_ep_idr_mutex); - id = idr_alloc(&iscsi_ep_idr, ep, 0, -1, GFP_NOIO); + + /* + * First endpoint id should be 1 to comply with user space + * applications (iscsid). + */ + id = idr_alloc(&iscsi_ep_idr, ep, 1, -1, GFP_NOIO); if (id < 0) { mutex_unlock(&iscsi_ep_idr_mutex); printk(KERN_ERR "Could not allocate endpoint ID. Error %d.\n", From 64e6ba7f2d2f022469be6c2a1c179851b94842dc Mon Sep 17 00:00:00 2001 From: Ciara Loftus Date: Tue, 14 Jun 2022 07:07:46 +0000 Subject: [PATCH 0039/1056] xsk: Fix generic transmit when completion queue reservation fails [ Upstream commit a6e944f25cdbe6b82275402b8bc9a55ad7aac10b ] Two points of potential failure in the generic transmit function are: 1. completion queue (cq) reservation failure. 2. skb allocation failure Originally the cq reservation was performed first, followed by the skb allocation. Commit 675716400da6 ("xdp: fix possible cq entry leak") reversed the order because at the time there was no mechanism available to undo the cq reservation which could have led to possible cq entry leaks in the event of skb allocation failure. However if the skb allocation is performed first and the cq reservation then fails, the xsk skb destructor is called which blindly adds the skb address to the already full cq leading to undefined behavior. This commit restores the original order (cq reservation followed by skb allocation) and uses the xskq_prod_cancel helper to undo the cq reserve in event of skb allocation failure. Fixes: 675716400da6 ("xdp: fix possible cq entry leak") Signed-off-by: Ciara Loftus Signed-off-by: Daniel Borkmann Acked-by: Magnus Karlsson Link: https://lore.kernel.org/bpf/20220614070746.8871-1-ciara.loftus@intel.com Signed-off-by: Sasha Levin --- net/xdp/xsk.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index 16cc38e51f14df..9b55ca27cccf2a 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -553,12 +553,6 @@ static int xsk_generic_xmit(struct sock *sk) goto out; } - skb = xsk_build_skb(xs, &desc); - if (IS_ERR(skb)) { - err = PTR_ERR(skb); - goto out; - } - /* This is the backpressure mechanism for the Tx path. * Reserve space in the completion queue and only proceed * if there is space in it. This avoids having to implement @@ -567,11 +561,19 @@ static int xsk_generic_xmit(struct sock *sk) spin_lock_irqsave(&xs->pool->cq_lock, flags); if (xskq_prod_reserve(xs->pool->cq)) { spin_unlock_irqrestore(&xs->pool->cq_lock, flags); - kfree_skb(skb); goto out; } spin_unlock_irqrestore(&xs->pool->cq_lock, flags); + skb = xsk_build_skb(xs, &desc); + if (IS_ERR(skb)) { + err = PTR_ERR(skb); + spin_lock_irqsave(&xs->pool->cq_lock, flags); + xskq_prod_cancel(xs->pool->cq); + spin_unlock_irqrestore(&xs->pool->cq_lock, flags); + goto out; + } + err = __dev_direct_xmit(skb, xs->queue_id); if (err == NETDEV_TX_BUSY) { /* Tell user-space to retry the send */ From 7154e4df56d45cf9025aa03cb34f52603ab441e7 Mon Sep 17 00:00:00 2001 From: Jonathan Marek Date: Mon, 13 Jun 2022 18:10:19 -0400 Subject: [PATCH 0040/1056] drm/msm: use for_each_sgtable_sg to iterate over scatterlist [ Upstream commit 62b5e322fb6cc5a5a91fdeba0e4e57e75d9f4387 ] The dma_map_sgtable() call (used to invalidate cache) overwrites sgt->nents with 1, so msm_iommu_pagetable_map maps only the first physical segment. To fix this problem use for_each_sgtable_sg(), which uses orig_nents. Fixes: b145c6e65eb0 ("drm/msm: Add support to create a local pagetable") Signed-off-by: Jonathan Marek Link: https://lore.kernel.org/r/20220613221019.11399-1-jonathan@marek.ca Signed-off-by: Rob Clark Signed-off-by: Sasha Levin --- drivers/gpu/drm/msm/msm_iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/msm/msm_iommu.c b/drivers/gpu/drm/msm/msm_iommu.c index bcaddbba564df0..a54ed354578b53 100644 --- a/drivers/gpu/drm/msm/msm_iommu.c +++ b/drivers/gpu/drm/msm/msm_iommu.c @@ -58,7 +58,7 @@ static int msm_iommu_pagetable_map(struct msm_mmu *mmu, u64 iova, u64 addr = iova; unsigned int i; - for_each_sg(sgt->sgl, sg, sgt->nents, i) { + for_each_sgtable_sg(sgt, sg, i) { size_t size = sg->length; phys_addr_t phys = sg_phys(sg); From b03607437ea81b850599f705096b05b85e7a4a71 Mon Sep 17 00:00:00 2001 From: Jon Maxwell Date: Wed, 15 Jun 2022 11:15:40 +1000 Subject: [PATCH 0041/1056] bpf: Fix request_sock leak in sk lookup helpers [ Upstream commit 3046a827316c0e55fc563b4fb78c93b9ca5c7c37 ] A customer reported a request_socket leak in a Calico cloud environment. We found that a BPF program was doing a socket lookup with takes a refcnt on the socket and that it was finding the request_socket but returning the parent LISTEN socket via sk_to_full_sk() without decrementing the child request socket 1st, resulting in request_sock slab object leak. This patch retains the existing behaviour of returning full socks to the caller but it also decrements the child request_socket if one is present before doing so to prevent the leak. Thanks to Curtis Taylor for all the help in diagnosing and testing this. And thanks to Antoine Tenart for the reproducer and patch input. v2 of this patch contains, refactor as per Daniel Borkmann's suggestions to validate RCU flags on the listen socket so that it balances with bpf_sk_release() and update comments as per Martin KaFai Lau's suggestion. One small change to Daniels suggestion, put "sk = sk2" under "if (sk2 != sk)" to avoid an extra instruction. Fixes: f7355a6c0497 ("bpf: Check sk_fullsock() before returning from bpf_sk_lookup()") Fixes: edbf8c01de5a ("bpf: add skc_lookup_tcp helper") Co-developed-by: Antoine Tenart Signed-off-by: Antoine Tenart Signed-off-by: Jon Maxwell Signed-off-by: Daniel Borkmann Tested-by: Curtis Taylor Cc: Martin KaFai Lau Link: https://lore.kernel.org/bpf/56d6f898-bde0-bb25-3427-12a330b29fb8@iogearbox.net Link: https://lore.kernel.org/bpf/20220615011540.813025-1-jmaxwell37@gmail.com Signed-off-by: Sasha Levin --- net/core/filter.c | 34 ++++++++++++++++++++++++++++------ 1 file changed, 28 insertions(+), 6 deletions(-) diff --git a/net/core/filter.c b/net/core/filter.c index 0816468c545cd2..d1e2ef77ce4c15 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -6209,10 +6209,21 @@ __bpf_sk_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len, ifindex, proto, netns_id, flags); if (sk) { - sk = sk_to_full_sk(sk); - if (!sk_fullsock(sk)) { + struct sock *sk2 = sk_to_full_sk(sk); + + /* sk_to_full_sk() may return (sk)->rsk_listener, so make sure the original sk + * sock refcnt is decremented to prevent a request_sock leak. + */ + if (!sk_fullsock(sk2)) + sk2 = NULL; + if (sk2 != sk) { sock_gen_put(sk); - return NULL; + /* Ensure there is no need to bump sk2 refcnt */ + if (unlikely(sk2 && !sock_flag(sk2, SOCK_RCU_FREE))) { + WARN_ONCE(1, "Found non-RCU, unreferenced socket!"); + return NULL; + } + sk = sk2; } } @@ -6246,10 +6257,21 @@ bpf_sk_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len, flags); if (sk) { - sk = sk_to_full_sk(sk); - if (!sk_fullsock(sk)) { + struct sock *sk2 = sk_to_full_sk(sk); + + /* sk_to_full_sk() may return (sk)->rsk_listener, so make sure the original sk + * sock refcnt is decremented to prevent a request_sock leak. + */ + if (!sk_fullsock(sk2)) + sk2 = NULL; + if (sk2 != sk) { sock_gen_put(sk); - return NULL; + /* Ensure there is no need to bump sk2 refcnt */ + if (unlikely(sk2 && !sock_flag(sk2, SOCK_RCU_FREE))) { + WARN_ONCE(1, "Found non-RCU, unreferenced socket!"); + return NULL; + } + sk = sk2; } } From 0eef1dcb97744dc7db95b06e90b64be5b651fbf1 Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Wed, 15 Jun 2022 00:42:53 -0500 Subject: [PATCH 0042/1056] drm/sun4i: Fix crash during suspend after component bind failure [ Upstream commit 1342b5b23da9559a1578978eaff7f797d8a87d91 ] If the component driver fails to bind, or is unbound, the driver data for the top-level platform device points to a freed drm_device. If the system is then suspended, the driver passes this dangling pointer to drm_mode_config_helper_suspend(), which crashes. Fix this by only setting the driver data while the platform driver holds a reference to the drm_device. Fixes: 624b4b48d9d8 ("drm: sun4i: Add support for suspending the display driver") Signed-off-by: Samuel Holland Reviewed-by: Jernej Skrabec Signed-off-by: Maxime Ripard Link: https://lore.kernel.org/r/20220615054254.16352-1-samuel@sholland.org Signed-off-by: Sasha Levin --- drivers/gpu/drm/sun4i/sun4i_drv.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/sun4i/sun4i_drv.c b/drivers/gpu/drm/sun4i/sun4i_drv.c index 54dd562e294ce3..5b7061e2bca4d2 100644 --- a/drivers/gpu/drm/sun4i/sun4i_drv.c +++ b/drivers/gpu/drm/sun4i/sun4i_drv.c @@ -72,7 +72,6 @@ static int sun4i_drv_bind(struct device *dev) goto free_drm; } - dev_set_drvdata(dev, drm); drm->dev_private = drv; INIT_LIST_HEAD(&drv->frontend_list); INIT_LIST_HEAD(&drv->engine_list); @@ -113,6 +112,8 @@ static int sun4i_drv_bind(struct device *dev) drm_fbdev_generic_setup(drm, 32); + dev_set_drvdata(dev, drm); + return 0; finish_poll: @@ -129,6 +130,7 @@ static void sun4i_drv_unbind(struct device *dev) { struct drm_device *drm = dev_get_drvdata(dev); + dev_set_drvdata(dev, NULL); drm_dev_unregister(drm); drm_kms_helper_poll_fini(drm); drm_atomic_helper_shutdown(drm); From b28e4e3fd34c97ef282ad77f9b398d01a82bc6b3 Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Thu, 16 Jun 2022 18:20:36 +0200 Subject: [PATCH 0043/1056] bpf, x86: Fix tail call count offset calculation on bpf2bpf call [ Upstream commit ff672c67ee7635ca1e28fb13729e8ef0d1f08ce5 ] On x86-64 the tail call count is passed from one BPF function to another through %rax. Additionally, on function entry, the tail call count value is stored on stack right after the BPF program stack, due to register shortage. The stored count is later loaded from stack either when performing a tail call - to check if we have not reached the tail call limit - or before calling another BPF function call in order to pass it via %rax. In the latter case, we miscalculate the offset at which the tail call count was stored on function entry. The JIT does not take into account that the allocated BPF program stack is always a multiple of 8 on x86, while the actual stack depth does not have to be. This leads to a load from an offset that belongs to the BPF stack, as shown in the example below: SEC("tc") int entry(struct __sk_buff *skb) { /* Have data on stack which size is not a multiple of 8 */ volatile char arr[1] = {}; return subprog_tail(skb); } int entry(struct __sk_buff * skb): 0: (b4) w2 = 0 1: (73) *(u8 *)(r10 -1) = r2 2: (85) call pc+1#bpf_prog_ce2f79bb5f3e06dd_F 3: (95) exit int entry(struct __sk_buff * skb): 0xffffffffa0201788: nop DWORD PTR [rax+rax*1+0x0] 0xffffffffa020178d: xor eax,eax 0xffffffffa020178f: push rbp 0xffffffffa0201790: mov rbp,rsp 0xffffffffa0201793: sub rsp,0x8 0xffffffffa020179a: push rax 0xffffffffa020179b: xor esi,esi 0xffffffffa020179d: mov BYTE PTR [rbp-0x1],sil 0xffffffffa02017a1: mov rax,QWORD PTR [rbp-0x9] !!! tail call count 0xffffffffa02017a8: call 0xffffffffa02017d8 !!! is at rbp-0x10 0xffffffffa02017ad: leave 0xffffffffa02017ae: ret Fix it by rounding up the BPF stack depth to a multiple of 8, when calculating the tail call count offset on stack. Fixes: ebf7d1f508a7 ("bpf, x64: rework pro/epilogue and tailcall handling in JIT") Signed-off-by: Jakub Sitnicki Signed-off-by: Daniel Borkmann Acked-by: Maciej Fijalkowski Acked-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20220616162037.535469-2-jakub@cloudflare.com Signed-off-by: Sasha Levin --- arch/x86/net/bpf_jit_comp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index d99434dc215c28..8dca2bcbb0eae3 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -1440,8 +1440,9 @@ st: if (is_imm8(insn->off)) case BPF_JMP | BPF_CALL: func = (u8 *) __bpf_call_base + imm32; if (tail_call_reachable) { + /* mov rax, qword ptr [rbp - rounded_stack_depth - 8] */ EMIT3_off32(0x48, 0x8B, 0x85, - -(bpf_prog->aux->stack_depth + 8)); + -round_up(bpf_prog->aux->stack_depth, 8) - 8); if (!imm32 || emit_call(&prog, func, image + addrs[i - 1] + 7)) return -EINVAL; } else { From 8e74f5ceea52fb76fe77ab5f22b8e667a93518aa Mon Sep 17 00:00:00 2001 From: Saurabh Sengar Date: Tue, 14 Jun 2022 00:05:55 -0700 Subject: [PATCH 0044/1056] scsi: storvsc: Correct reporting of Hyper-V I/O size limits MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 1d3e0980782fbafaf93285779fd3905e4f866802 ] Current code is based on the idea that the max number of SGL entries also determines the max size of an I/O request. While this idea was true in older versions of the storvsc driver when SGL entry length was limited to 4 Kbytes, commit 3d9c3dcc58e9 ("scsi: storvsc: Enable scatterlist entry lengths > 4Kbytes") removed that limitation. It's now theoretically possible for the block layer to send requests that exceed the maximum size supported by Hyper-V. This problem doesn't currently happen in practice because the block layer defaults to a 512 Kbyte maximum, while Hyper-V in Azure supports 2 Mbyte I/O sizes. But some future configuration of Hyper-V could have a smaller max I/O size, and the block layer could exceed that max. Fix this by correctly setting max_sectors as well as sg_tablesize to reflect the maximum I/O size that Hyper-V reports. While allowing I/O sizes larger than the block layer default of 512 Kbytes doesn’t provide any noticeable performance benefit in the tests we ran, it's still appropriate to report the correct underlying Hyper-V capabilities to the Linux block layer. Also tweak the virt_boundary_mask to reflect that the required alignment derives from Hyper-V communication using a 4 Kbyte page size, and not on the guest page size, which might be bigger (eg. ARM64). Link: https://lore.kernel.org/r/1655190355-28722-1-git-send-email-ssengar@linux.microsoft.com Fixes: 3d9c3dcc58e9 ("scsi: storvsc: Enable scatter list entry lengths > 4Kbytes") Reviewed-by: Michael Kelley Signed-off-by: Saurabh Sengar Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/storvsc_drv.c | 27 ++++++++++++++++++++++----- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c index 9eb1b88a29dde3..71c7f7b435c4af 100644 --- a/drivers/scsi/storvsc_drv.c +++ b/drivers/scsi/storvsc_drv.c @@ -1907,7 +1907,7 @@ static struct scsi_host_template scsi_driver = { .cmd_per_lun = 2048, .this_id = -1, /* Ensure there are no gaps in presented sgls */ - .virt_boundary_mask = PAGE_SIZE-1, + .virt_boundary_mask = HV_HYP_PAGE_SIZE - 1, .no_write_same = 1, .track_queue_depth = 1, .change_queue_depth = storvsc_change_queue_depth, @@ -1961,6 +1961,7 @@ static int storvsc_probe(struct hv_device *device, int max_targets; int max_channels; int max_sub_channels = 0; + u32 max_xfer_bytes; /* * Based on the windows host we are running on, @@ -2049,12 +2050,28 @@ static int storvsc_probe(struct hv_device *device, } /* max cmd length */ host->max_cmd_len = STORVSC_MAX_CMD_LEN; - /* - * set the table size based on the info we got - * from the host. + * Any reasonable Hyper-V configuration should provide + * max_transfer_bytes value aligning to HV_HYP_PAGE_SIZE, + * protecting it from any weird value. + */ + max_xfer_bytes = round_down(stor_device->max_transfer_bytes, HV_HYP_PAGE_SIZE); + /* max_hw_sectors_kb */ + host->max_sectors = max_xfer_bytes >> 9; + /* + * There are 2 requirements for Hyper-V storvsc sgl segments, + * based on which the below calculation for max segments is + * done: + * + * 1. Except for the first and last sgl segment, all sgl segments + * should be align to HV_HYP_PAGE_SIZE, that also means the + * maximum number of segments in a sgl can be calculated by + * dividing the total max transfer length by HV_HYP_PAGE_SIZE. + * + * 2. Except for the first and last, each entry in the SGL must + * have an offset that is a multiple of HV_HYP_PAGE_SIZE. */ - host->sg_tablesize = (stor_device->max_transfer_bytes >> PAGE_SHIFT); + host->sg_tablesize = (max_xfer_bytes >> HV_HYP_PAGE_SHIFT) + 1; /* * For non-IDE disks, the host supports multiple channels. * Set the number of HW queues we are supporting. From d7fe6be43cfaaa10a9cc48a5baf202c387caeefd Mon Sep 17 00:00:00 2001 From: Claudiu Manoil Date: Fri, 10 Jun 2022 11:40:37 +0300 Subject: [PATCH 0045/1056] phy: aquantia: Fix AN when higher speeds than 1G are not advertised [ Upstream commit 9b7fd1670a94a57d974795acebde843a5c1a354e ] Even when the eth port is resticted to work with speeds not higher than 1G, and so the eth driver is requesting the phy (via phylink) to advertise up to 1000BASET support, the aquantia phy device is still advertising for 2.5G and 5G speeds. Clear these advertising defaults when requested. Cc: Ondrej Spacek Fixes: 09c4c57f7bc41 ("net: phy: aquantia: add support for auto-negotiation configuration") Signed-off-by: Claudiu Manoil Link: https://lore.kernel.org/r/20220610084037.7625-1-claudiu.manoil@nxp.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/phy/aquantia_main.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/drivers/net/phy/aquantia_main.c b/drivers/net/phy/aquantia_main.c index 968dd43a2b1e00..3221224525ac9f 100644 --- a/drivers/net/phy/aquantia_main.c +++ b/drivers/net/phy/aquantia_main.c @@ -34,6 +34,8 @@ #define MDIO_AN_VEND_PROV 0xc400 #define MDIO_AN_VEND_PROV_1000BASET_FULL BIT(15) #define MDIO_AN_VEND_PROV_1000BASET_HALF BIT(14) +#define MDIO_AN_VEND_PROV_5000BASET_FULL BIT(11) +#define MDIO_AN_VEND_PROV_2500BASET_FULL BIT(10) #define MDIO_AN_VEND_PROV_DOWNSHIFT_EN BIT(4) #define MDIO_AN_VEND_PROV_DOWNSHIFT_MASK GENMASK(3, 0) #define MDIO_AN_VEND_PROV_DOWNSHIFT_DFLT 4 @@ -231,9 +233,20 @@ static int aqr_config_aneg(struct phy_device *phydev) phydev->advertising)) reg |= MDIO_AN_VEND_PROV_1000BASET_HALF; + /* Handle the case when the 2.5G and 5G speeds are not advertised */ + if (linkmode_test_bit(ETHTOOL_LINK_MODE_2500baseT_Full_BIT, + phydev->advertising)) + reg |= MDIO_AN_VEND_PROV_2500BASET_FULL; + + if (linkmode_test_bit(ETHTOOL_LINK_MODE_5000baseT_Full_BIT, + phydev->advertising)) + reg |= MDIO_AN_VEND_PROV_5000BASET_FULL; + ret = phy_modify_mmd_changed(phydev, MDIO_MMD_AN, MDIO_AN_VEND_PROV, MDIO_AN_VEND_PROV_1000BASET_HALF | - MDIO_AN_VEND_PROV_1000BASET_FULL, reg); + MDIO_AN_VEND_PROV_1000BASET_FULL | + MDIO_AN_VEND_PROV_2500BASET_FULL | + MDIO_AN_VEND_PROV_5000BASET_FULL, reg); if (ret < 0) return ret; if (ret > 0) From 638be56ae9cc5c25a81c525df20b1b9d92b08613 Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Thu, 16 Jun 2022 16:11:34 +0000 Subject: [PATCH 0046/1056] KVM: arm64: Prevent kmemleak from accessing pKVM memory [ Upstream commit 56961c6331463cce2d84d0f973177a517fb33a82 ] Commit a7259df76702 ("memblock: make memblock_find_in_range method private") changed the API using which memory is reserved for the pKVM hypervisor. However, memblock_phys_alloc() differs from the original API in terms of kmemleak semantics -- the old one didn't report the reserved regions to kmemleak while the new one does. Unfortunately, when protected KVM is enabled, all kernel accesses to pKVM-private memory result in a fatal exception, which can now happen because of kmemleak scans: $ echo scan > /sys/kernel/debug/kmemleak [ 34.991354] kvm [304]: nVHE hyp BUG at: [] __kvm_nvhe_handle_host_mem_abort+0x270/0x290! [ 34.991580] kvm [304]: Hyp Offset: 0xfffe8be807e00000 [ 34.991813] Kernel panic - not syncing: HYP panic: [ 34.991813] PS:600003c9 PC:0000f418011a3750 ESR:00000000f2000800 [ 34.991813] FAR:ffff000439200000 HPFAR:0000000004792000 PAR:0000000000000000 [ 34.991813] VCPU:0000000000000000 [ 34.993660] CPU: 0 PID: 304 Comm: bash Not tainted 5.19.0-rc2 #102 [ 34.994059] Hardware name: linux,dummy-virt (DT) [ 34.994452] Call trace: [ 34.994641] dump_backtrace.part.0+0xcc/0xe0 [ 34.994932] show_stack+0x18/0x6c [ 34.995094] dump_stack_lvl+0x68/0x84 [ 34.995276] dump_stack+0x18/0x34 [ 34.995484] panic+0x16c/0x354 [ 34.995673] __hyp_pgtable_total_pages+0x0/0x60 [ 34.995933] scan_block+0x74/0x12c [ 34.996129] scan_gray_list+0xd8/0x19c [ 34.996332] kmemleak_scan+0x2c8/0x580 [ 34.996535] kmemleak_write+0x340/0x4a0 [ 34.996744] full_proxy_write+0x60/0xbc [ 34.996967] vfs_write+0xc4/0x2b0 [ 34.997136] ksys_write+0x68/0xf4 [ 34.997311] __arm64_sys_write+0x20/0x2c [ 34.997532] invoke_syscall+0x48/0x114 [ 34.997779] el0_svc_common.constprop.0+0x44/0xec [ 34.998029] do_el0_svc+0x2c/0xc0 [ 34.998205] el0_svc+0x2c/0x84 [ 34.998421] el0t_64_sync_handler+0xf4/0x100 [ 34.998653] el0t_64_sync+0x18c/0x190 [ 34.999252] SMP: stopping secondary CPUs [ 35.000034] Kernel Offset: disabled [ 35.000261] CPU features: 0x800,00007831,00001086 [ 35.000642] Memory Limit: none [ 35.001329] ---[ end Kernel panic - not syncing: HYP panic: [ 35.001329] PS:600003c9 PC:0000f418011a3750 ESR:00000000f2000800 [ 35.001329] FAR:ffff000439200000 HPFAR:0000000004792000 PAR:0000000000000000 [ 35.001329] VCPU:0000000000000000 ]--- Fix this by explicitly excluding the hypervisor's memory pool from kmemleak like we already do for the hyp BSS. Cc: Mike Rapoport Fixes: a7259df76702 ("memblock: make memblock_find_in_range method private") Signed-off-by: Quentin Perret Acked-by: Catalin Marinas Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20220616161135.3997786-1-qperret@google.com Signed-off-by: Sasha Levin --- arch/arm64/kvm/arm.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index a30c036577a323..f181527f9d4363 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -2011,11 +2011,11 @@ static int finalize_hyp_mode(void) return 0; /* - * Exclude HYP BSS from kmemleak so that it doesn't get peeked - * at, which would end badly once the section is inaccessible. - * None of other sections should ever be introspected. + * Exclude HYP sections from kmemleak so that they don't get peeked + * at, which would end badly once inaccessible. */ kmemleak_free_part(__hyp_bss_start, __hyp_bss_end - __hyp_bss_start); + kmemleak_free_part(__va(hyp_mem_base), hyp_mem_size); return pkvm_drop_host_privileges(); } From f617cef465523e453ea3df4e87cb8e02c3efbace Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Fri, 26 Nov 2021 17:15:29 +0100 Subject: [PATCH 0047/1056] net: Write lock dev_base_lock without disabling bottom halves. [ Upstream commit fd888e85fe6b661e78044dddfec0be5271afa626 ] The writer acquires dev_base_lock with disabled bottom halves. The reader can acquire dev_base_lock without disabling bottom halves because there is no writer in softirq context. On PREEMPT_RT the softirqs are preemptible and local_bh_disable() acts as a lock to ensure that resources, that are protected by disabling bottom halves, remain protected. This leads to a circular locking dependency if the lock acquired with disabled bottom halves (as in write_lock_bh()) and somewhere else with enabled bottom halves (as by read_lock() in netstat_show()) followed by disabling bottom halves (cxgb_get_stats() -> t4_wr_mbox_meat_timeout() -> spin_lock_bh()). This is the reverse locking order. All read_lock() invocation are from sysfs callback which are not invoked from softirq context. Therefore there is no need to disable bottom halves while acquiring a write lock. Acquire the write lock of dev_base_lock without disabling bottom halves. Reported-by: Pei Zhang Reported-by: Luis Claudio R. Goncalves Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/core/dev.c | 16 ++++++++-------- net/core/link_watch.c | 4 ++-- net/core/rtnetlink.c | 8 ++++---- net/hsr/hsr_device.c | 6 +++--- 4 files changed, 17 insertions(+), 17 deletions(-) diff --git a/net/core/dev.c b/net/core/dev.c index b9731b267d0736..860fc6a98373a3 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -365,12 +365,12 @@ static void list_netdevice(struct net_device *dev) ASSERT_RTNL(); - write_lock_bh(&dev_base_lock); + write_lock(&dev_base_lock); list_add_tail_rcu(&dev->dev_list, &net->dev_base_head); netdev_name_node_add(net, dev->name_node); hlist_add_head_rcu(&dev->index_hlist, dev_index_hash(net, dev->ifindex)); - write_unlock_bh(&dev_base_lock); + write_unlock(&dev_base_lock); dev_base_seq_inc(net); } @@ -383,11 +383,11 @@ static void unlist_netdevice(struct net_device *dev) ASSERT_RTNL(); /* Unlink dev from the device chain */ - write_lock_bh(&dev_base_lock); + write_lock(&dev_base_lock); list_del_rcu(&dev->dev_list); netdev_name_node_del(dev->name_node); hlist_del_rcu(&dev->index_hlist); - write_unlock_bh(&dev_base_lock); + write_unlock(&dev_base_lock); dev_base_seq_inc(dev_net(dev)); } @@ -1266,15 +1266,15 @@ int dev_change_name(struct net_device *dev, const char *newname) netdev_adjacent_rename_links(dev, oldname); - write_lock_bh(&dev_base_lock); + write_lock(&dev_base_lock); netdev_name_node_del(dev->name_node); - write_unlock_bh(&dev_base_lock); + write_unlock(&dev_base_lock); synchronize_rcu(); - write_lock_bh(&dev_base_lock); + write_lock(&dev_base_lock); netdev_name_node_add(net, dev->name_node); - write_unlock_bh(&dev_base_lock); + write_unlock(&dev_base_lock); ret = call_netdevice_notifiers(NETDEV_CHANGENAME, dev); ret = notifier_to_errno(ret); diff --git a/net/core/link_watch.c b/net/core/link_watch.c index 1a455847da54fc..9599afd0862dab 100644 --- a/net/core/link_watch.c +++ b/net/core/link_watch.c @@ -55,7 +55,7 @@ static void rfc2863_policy(struct net_device *dev) if (operstate == dev->operstate) return; - write_lock_bh(&dev_base_lock); + write_lock(&dev_base_lock); switch(dev->link_mode) { case IF_LINK_MODE_TESTING: @@ -74,7 +74,7 @@ static void rfc2863_policy(struct net_device *dev) dev->operstate = operstate; - write_unlock_bh(&dev_base_lock); + write_unlock(&dev_base_lock); } diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 9c0e8ccf9bc58e..8c85e93daa739b 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -842,9 +842,9 @@ static void set_operstate(struct net_device *dev, unsigned char transition) } if (dev->operstate != operstate) { - write_lock_bh(&dev_base_lock); + write_lock(&dev_base_lock); dev->operstate = operstate; - write_unlock_bh(&dev_base_lock); + write_unlock(&dev_base_lock); netdev_state_change(dev); } } @@ -2781,11 +2781,11 @@ static int do_setlink(const struct sk_buff *skb, if (tb[IFLA_LINKMODE]) { unsigned char value = nla_get_u8(tb[IFLA_LINKMODE]); - write_lock_bh(&dev_base_lock); + write_lock(&dev_base_lock); if (dev->link_mode ^ value) status |= DO_SETLINK_NOTIFY; dev->link_mode = value; - write_unlock_bh(&dev_base_lock); + write_unlock(&dev_base_lock); } if (tb[IFLA_VFINFO_LIST]) { diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c index 26c32407f02909..ea7b96e296ef07 100644 --- a/net/hsr/hsr_device.c +++ b/net/hsr/hsr_device.c @@ -30,13 +30,13 @@ static bool is_slave_up(struct net_device *dev) static void __hsr_set_operstate(struct net_device *dev, int transition) { - write_lock_bh(&dev_base_lock); + write_lock(&dev_base_lock); if (dev->operstate != transition) { dev->operstate = transition; - write_unlock_bh(&dev_base_lock); + write_unlock(&dev_base_lock); netdev_state_change(dev); } else { - write_unlock_bh(&dev_base_lock); + write_unlock(&dev_base_lock); } } From ad10d61c55aaa6e4ce50099d2e85cde4b19f1ac8 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 16 Jun 2022 00:34:34 -0700 Subject: [PATCH 0048/1056] net: fix data-race in dev_isalive() [ Upstream commit cc26c2661fefea215f41edb665193324a5f99021 ] dev_isalive() is called under RTNL or dev_base_lock protection. This means that changes to dev->reg_state should be done with both locks held. syzbot reported: BUG: KCSAN: data-race in register_netdevice / type_show write to 0xffff888144ecf518 of 1 bytes by task 20886 on cpu 0: register_netdevice+0xb9f/0xdf0 net/core/dev.c:10050 lapbeth_new_device drivers/net/wan/lapbether.c:414 [inline] lapbeth_device_event+0x4a0/0x6c0 drivers/net/wan/lapbether.c:456 notifier_call_chain kernel/notifier.c:87 [inline] raw_notifier_call_chain+0x53/0xb0 kernel/notifier.c:455 __dev_notify_flags+0x1d6/0x3a0 dev_change_flags+0xa2/0xc0 net/core/dev.c:8607 do_setlink+0x778/0x2230 net/core/rtnetlink.c:2780 __rtnl_newlink net/core/rtnetlink.c:3546 [inline] rtnl_newlink+0x114c/0x16a0 net/core/rtnetlink.c:3593 rtnetlink_rcv_msg+0x811/0x8c0 net/core/rtnetlink.c:6089 netlink_rcv_skb+0x13e/0x240 net/netlink/af_netlink.c:2501 rtnetlink_rcv+0x18/0x20 net/core/rtnetlink.c:6107 netlink_unicast_kernel net/netlink/af_netlink.c:1319 [inline] netlink_unicast+0x58a/0x660 net/netlink/af_netlink.c:1345 netlink_sendmsg+0x661/0x750 net/netlink/af_netlink.c:1921 sock_sendmsg_nosec net/socket.c:714 [inline] sock_sendmsg net/socket.c:734 [inline] __sys_sendto+0x21e/0x2c0 net/socket.c:2119 __do_sys_sendto net/socket.c:2131 [inline] __se_sys_sendto net/socket.c:2127 [inline] __x64_sys_sendto+0x74/0x90 net/socket.c:2127 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x2b/0x70 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x46/0xb0 read to 0xffff888144ecf518 of 1 bytes by task 20423 on cpu 1: dev_isalive net/core/net-sysfs.c:38 [inline] netdev_show net/core/net-sysfs.c:50 [inline] type_show+0x24/0x90 net/core/net-sysfs.c:112 dev_attr_show+0x35/0x90 drivers/base/core.c:2095 sysfs_kf_seq_show+0x175/0x240 fs/sysfs/file.c:59 kernfs_seq_show+0x75/0x80 fs/kernfs/file.c:162 seq_read_iter+0x2c3/0x8e0 fs/seq_file.c:230 kernfs_fop_read_iter+0xd1/0x2f0 fs/kernfs/file.c:235 call_read_iter include/linux/fs.h:2052 [inline] new_sync_read fs/read_write.c:401 [inline] vfs_read+0x5a5/0x6a0 fs/read_write.c:482 ksys_read+0xe8/0x1a0 fs/read_write.c:620 __do_sys_read fs/read_write.c:630 [inline] __se_sys_read fs/read_write.c:628 [inline] __x64_sys_read+0x3e/0x50 fs/read_write.c:628 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x2b/0x70 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x46/0xb0 value changed: 0x00 -> 0x01 Reported by Kernel Concurrency Sanitizer on: CPU: 1 PID: 20423 Comm: udevd Tainted: G W 5.19.0-rc2-syzkaller-dirty #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Eric Dumazet Reported-by: syzbot Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/core/dev.c | 28 ++++++++++++++++------------ net/core/net-sysfs.c | 1 + 2 files changed, 17 insertions(+), 12 deletions(-) diff --git a/net/core/dev.c b/net/core/dev.c index 860fc6a98373a3..6111506a41053a 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -378,16 +378,18 @@ static void list_netdevice(struct net_device *dev) /* Device list removal * caller must respect a RCU grace period before freeing/reusing dev */ -static void unlist_netdevice(struct net_device *dev) +static void unlist_netdevice(struct net_device *dev, bool lock) { ASSERT_RTNL(); /* Unlink dev from the device chain */ - write_lock(&dev_base_lock); + if (lock) + write_lock(&dev_base_lock); list_del_rcu(&dev->dev_list); netdev_name_node_del(dev->name_node); hlist_del_rcu(&dev->index_hlist); - write_unlock(&dev_base_lock); + if (lock) + write_unlock(&dev_base_lock); dev_base_seq_inc(dev_net(dev)); } @@ -10319,11 +10321,11 @@ int register_netdevice(struct net_device *dev) goto err_uninit; ret = netdev_register_kobject(dev); - if (ret) { - dev->reg_state = NETREG_UNREGISTERED; + write_lock(&dev_base_lock); + dev->reg_state = ret ? NETREG_UNREGISTERED : NETREG_REGISTERED; + write_unlock(&dev_base_lock); + if (ret) goto err_uninit; - } - dev->reg_state = NETREG_REGISTERED; __netdev_update_features(dev); @@ -10483,8 +10485,6 @@ static void netdev_wait_allrefs(struct net_device *dev) unsigned long rebroadcast_time, warning_time; int wait = 0, refcnt; - linkwatch_forget_dev(dev); - rebroadcast_time = warning_time = jiffies; refcnt = netdev_refcnt_read(dev); @@ -10598,7 +10598,10 @@ void netdev_run_todo(void) continue; } + write_lock(&dev_base_lock); dev->reg_state = NETREG_UNREGISTERED; + write_unlock(&dev_base_lock); + linkwatch_forget_dev(dev); netdev_wait_allrefs(dev); @@ -11043,9 +11046,10 @@ void unregister_netdevice_many(struct list_head *head) list_for_each_entry(dev, head, unreg_list) { /* And unlink it from device chain. */ - unlist_netdevice(dev); - + write_lock(&dev_base_lock); + unlist_netdevice(dev, false); dev->reg_state = NETREG_UNREGISTERING; + write_unlock(&dev_base_lock); } flush_all_backlogs(); @@ -11190,7 +11194,7 @@ int __dev_change_net_namespace(struct net_device *dev, struct net *net, dev_close(dev); /* And unlink it from device chain */ - unlist_netdevice(dev); + unlist_netdevice(dev, true); synchronize_net(); diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 9e5657f6324539..e9ea0695efb420 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -32,6 +32,7 @@ static const char fmt_dec[] = "%d\n"; static const char fmt_ulong[] = "%lu\n"; static const char fmt_u64[] = "%llu\n"; +/* Caller holds RTNL or dev_base_lock */ static inline int dev_isalive(const struct net_device *dev) { return dev->reg_state <= NETREG_REGISTERED; From cd7789e659e84f137631dc1f5ec8d794f2700e6c Mon Sep 17 00:00:00 2001 From: Hoang Le Date: Fri, 17 Jun 2022 08:45:51 +0700 Subject: [PATCH 0049/1056] tipc: fix use-after-free Read in tipc_named_reinit [ Upstream commit 911600bf5a5e84bfda4d33ee32acc75ecf6159f0 ] syzbot found the following issue on: ================================================================== BUG: KASAN: use-after-free in tipc_named_reinit+0x94f/0x9b0 net/tipc/name_distr.c:413 Read of size 8 at addr ffff88805299a000 by task kworker/1:9/23764 CPU: 1 PID: 23764 Comm: kworker/1:9 Not tainted 5.18.0-rc4-syzkaller-00878-g17d49e6e8012 #0 Hardware name: Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Workqueue: events tipc_net_finalize_work Call Trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0xcd/0x134 lib/dump_stack.c:106 print_address_description.constprop.0.cold+0xeb/0x495 mm/kasan/report.c:313 print_report mm/kasan/report.c:429 [inline] kasan_report.cold+0xf4/0x1c6 mm/kasan/report.c:491 tipc_named_reinit+0x94f/0x9b0 net/tipc/name_distr.c:413 tipc_net_finalize+0x234/0x3d0 net/tipc/net.c:138 process_one_work+0x996/0x1610 kernel/workqueue.c:2289 worker_thread+0x665/0x1080 kernel/workqueue.c:2436 kthread+0x2e9/0x3a0 kernel/kthread.c:376 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:298 [...] ================================================================== In the commit d966ddcc3821 ("tipc: fix a deadlock when flushing scheduled work"), the cancel_work_sync() function just to make sure ONLY the work tipc_net_finalize_work() is executing/pending on any CPU completed before tipc namespace is destroyed through tipc_exit_net(). But this function is not guaranteed the work is the last queued. So, the destroyed instance may be accessed in the work which will try to enqueue later. In order to completely fix, we re-order the calling of cancel_work_sync() to make sure the work tipc_net_finalize_work() was last queued and it must be completed by calling cancel_work_sync(). Reported-by: syzbot+47af19f3307fc9c5c82e@syzkaller.appspotmail.com Fixes: d966ddcc3821 ("tipc: fix a deadlock when flushing scheduled work") Acked-by: Jon Maloy Signed-off-by: Ying Xue Signed-off-by: Hoang Le Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/tipc/core.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/tipc/core.c b/net/tipc/core.c index 3f4542e0f0650b..434e70eabe0812 100644 --- a/net/tipc/core.c +++ b/net/tipc/core.c @@ -109,10 +109,9 @@ static void __net_exit tipc_exit_net(struct net *net) struct tipc_net *tn = tipc_net(net); tipc_detach_loopback(net); + tipc_net_stop(net); /* Make sure the tipc_net_finalize_work() finished */ cancel_work_sync(&tn->work); - tipc_net_stop(net); - tipc_bcast_stop(net); tipc_nametbl_stop(net); tipc_sk_rht_destroy(net); From 2af944210dc23d43d8208dafac4df7be7e3c168b Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Thu, 16 Jun 2022 16:13:20 +0200 Subject: [PATCH 0050/1056] igb: fix a use-after-free issue in igb_clean_tx_ring [ Upstream commit 3f6a57ee8544ec3982f8a3cbcbf4aea7d47eb9ec ] Fix the following use-after-free bug in igb_clean_tx_ring routine when the NIC is running in XDP mode. The issue can be triggered redirecting traffic into the igb NIC and then closing the device while the traffic is flowing. [ 73.322719] CPU: 1 PID: 487 Comm: xdp_redirect Not tainted 5.18.3-apu2 #9 [ 73.330639] Hardware name: PC Engines APU2/APU2, BIOS 4.0.7 02/28/2017 [ 73.337434] RIP: 0010:refcount_warn_saturate+0xa7/0xf0 [ 73.362283] RSP: 0018:ffffc9000081f798 EFLAGS: 00010282 [ 73.367761] RAX: 0000000000000000 RBX: ffffc90000420f80 RCX: 0000000000000000 [ 73.375200] RDX: ffff88811ad22d00 RSI: ffff88811ad171e0 RDI: ffff88811ad171e0 [ 73.382590] RBP: 0000000000000900 R08: ffffffff82298f28 R09: 0000000000000058 [ 73.390008] R10: 0000000000000219 R11: ffffffff82280f40 R12: 0000000000000090 [ 73.397356] R13: ffff888102343a40 R14: ffff88810359e0e4 R15: 0000000000000000 [ 73.404806] FS: 00007ff38d31d740(0000) GS:ffff88811ad00000(0000) knlGS:0000000000000000 [ 73.413129] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 73.419096] CR2: 000055cff35f13f8 CR3: 0000000106391000 CR4: 00000000000406e0 [ 73.426565] Call Trace: [ 73.429087] [ 73.431314] igb_clean_tx_ring+0x43/0x140 [igb] [ 73.436002] igb_down+0x1d7/0x220 [igb] [ 73.439974] __igb_close+0x3c/0x120 [igb] [ 73.444118] igb_xdp+0x10c/0x150 [igb] [ 73.447983] ? igb_pci_sriov_configure+0x70/0x70 [igb] [ 73.453362] dev_xdp_install+0xda/0x110 [ 73.457371] dev_xdp_attach+0x1da/0x550 [ 73.461369] do_setlink+0xfd0/0x10f0 [ 73.465166] ? __nla_validate_parse+0x89/0xc70 [ 73.469714] rtnl_setlink+0x11a/0x1e0 [ 73.473547] rtnetlink_rcv_msg+0x145/0x3d0 [ 73.477709] ? rtnl_calcit.isra.0+0x130/0x130 [ 73.482258] netlink_rcv_skb+0x8d/0x110 [ 73.486229] netlink_unicast+0x230/0x340 [ 73.490317] netlink_sendmsg+0x215/0x470 [ 73.494395] __sys_sendto+0x179/0x190 [ 73.498268] ? move_addr_to_user+0x37/0x70 [ 73.502547] ? __sys_getsockname+0x84/0xe0 [ 73.506853] ? netlink_setsockopt+0x1c1/0x4a0 [ 73.511349] ? __sys_setsockopt+0xc8/0x1d0 [ 73.515636] __x64_sys_sendto+0x20/0x30 [ 73.519603] do_syscall_64+0x3b/0x80 [ 73.523399] entry_SYSCALL_64_after_hwframe+0x44/0xae [ 73.528712] RIP: 0033:0x7ff38d41f20c [ 73.551866] RSP: 002b:00007fff3b945a68 EFLAGS: 00000246 ORIG_RAX: 000000000000002c [ 73.559640] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007ff38d41f20c [ 73.567066] RDX: 0000000000000034 RSI: 00007fff3b945b30 RDI: 0000000000000003 [ 73.574457] RBP: 0000000000000003 R08: 0000000000000000 R09: 0000000000000000 [ 73.581852] R10: 0000000000000000 R11: 0000000000000246 R12: 00007fff3b945ab0 [ 73.589179] R13: 0000000000000000 R14: 0000000000000003 R15: 00007fff3b945b30 [ 73.596545] [ 73.598842] ---[ end trace 0000000000000000 ]--- Fixes: 9cbc948b5a20c ("igb: add XDP support") Signed-off-by: Lorenzo Bianconi Reviewed-by: Jesse Brandeburg Acked-by: Jesper Dangaard Brouer Link: https://lore.kernel.org/r/e5c01d549dc37bff18e46aeabd6fb28a7bcf84be.1655388571.git.lorenzo@kernel.org Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/igb/igb_main.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index b8830335148439..5ee5ee8e684826 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -4819,8 +4819,11 @@ static void igb_clean_tx_ring(struct igb_ring *tx_ring) while (i != tx_ring->next_to_use) { union e1000_adv_tx_desc *eop_desc, *tx_desc; - /* Free all the Tx ring sk_buffs */ - dev_kfree_skb_any(tx_buffer->skb); + /* Free all the Tx ring sk_buffs or xdp frames */ + if (tx_buffer->type == IGB_TYPE_SKB) + dev_kfree_skb_any(tx_buffer->skb); + else + xdp_return_frame(tx_buffer->xdpf); /* unmap skb header data */ dma_unmap_single(tx_ring->dev, From 6386fdde8df05e974e468d392c3684c790d75a4c Mon Sep 17 00:00:00 2001 From: Jay Vosburgh Date: Thu, 16 Jun 2022 12:32:40 -0700 Subject: [PATCH 0051/1056] bonding: ARP monitor spams NETDEV_NOTIFY_PEERS notifiers [ Upstream commit 7a9214f3d88cfdb099f3896e102a306b316d8707 ] The bonding ARP monitor fails to decrement send_peer_notif, the number of peer notifications (gratuitous ARP or ND) to be sent. This results in a continuous series of notifications. Correct this by decrementing the counter for each notification. Reported-by: Jonathan Toppins Signed-off-by: Jay Vosburgh Fixes: b0929915e035 ("bonding: Fix RTNL: assertion failed at net/core/rtnetlink.c for ab arp monitor") Link: https://lore.kernel.org/netdev/b2fd4147-8f50-bebd-963a-1a3e8d1d9715@redhat.com/ Tested-by: Jonathan Toppins Reviewed-by: Jonathan Toppins Link: https://lore.kernel.org/r/9400.1655407960@famine Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/bonding/bond_main.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 2e75b7e8f70b3d..cd0d7b24f01400 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -3474,9 +3474,11 @@ static void bond_activebackup_arp_mon(struct bonding *bond) if (!rtnl_trylock()) return; - if (should_notify_peers) + if (should_notify_peers) { + bond->send_peer_notif--; call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, bond->dev); + } if (should_notify_rtnl) { bond_slave_state_notify(bond); bond_slave_link_notify(bond); From efb10d2a047d115ec953187e1ccfd30b002473a2 Mon Sep 17 00:00:00 2001 From: Ivan Vecera Date: Thu, 16 Jun 2022 18:08:55 +0200 Subject: [PATCH 0052/1056] ethtool: Fix get module eeprom fallback [ Upstream commit a3bb7b63813f674fb62bac321cdd897cc62de094 ] Function fallback_set_params() checks if the module type returned by a driver is ETH_MODULE_SFF_8079 and in this case it assumes that buffer returns a concatenated content of page A0h and A2h. The check is wrong because the correct type is ETH_MODULE_SFF_8472. Fixes: 96d971e307cc ("ethtool: Add fallback to get_module_eeprom from netlink command") Signed-off-by: Ivan Vecera Reviewed-by: Ido Schimmel Link: https://lore.kernel.org/r/20220616160856.3623273-1-ivecera@redhat.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/ethtool/eeprom.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ethtool/eeprom.c b/net/ethtool/eeprom.c index 7e6b37a54add35..1c94bb8ea03f23 100644 --- a/net/ethtool/eeprom.c +++ b/net/ethtool/eeprom.c @@ -36,7 +36,7 @@ static int fallback_set_params(struct eeprom_req_info *request, if (request->page) offset = request->page * ETH_MODULE_EEPROM_PAGE_LEN + offset; - if (modinfo->type == ETH_MODULE_SFF_8079 && + if (modinfo->type == ETH_MODULE_SFF_8472 && request->i2c_address == 0x51) offset += ETH_MODULE_EEPROM_PAGE_LEN * 2; From 45bd293bbcd8ce2d37579d823e25a56043f5f1a6 Mon Sep 17 00:00:00 2001 From: Peilin Ye Date: Thu, 16 Jun 2022 16:43:36 -0700 Subject: [PATCH 0053/1056] net/sched: sch_netem: Fix arithmetic in netem_dump() for 32-bit platforms [ Upstream commit a2b1a5d40bd12b44322c2ccd40bb0ec1699708b6 ] As reported by Yuming, currently tc always show a latency of UINT_MAX for netem Qdisc's on 32-bit platforms: $ tc qdisc add dev dummy0 root netem latency 100ms $ tc qdisc show dev dummy0 qdisc netem 8001: root refcnt 2 limit 1000 delay 275s 275s ^^^^^^^^^^^^^^^^ Let us take a closer look at netem_dump(): qopt.latency = min_t(psched_tdiff_t, PSCHED_NS2TICKS(q->latency, UINT_MAX); qopt.latency is __u32, psched_tdiff_t is signed long, (psched_tdiff_t)(UINT_MAX) is negative for 32-bit platforms, so qopt.latency is always UINT_MAX. Fix it by using psched_time_t (u64) instead. Note: confusingly, users have two ways to specify 'latency': 1. normally, via '__u32 latency' in struct tc_netem_qopt; 2. via the TCA_NETEM_LATENCY64 attribute, which is s64. For the second case, theoretically 'latency' could be negative. This patch ignores that corner case, since it is broken (i.e. assigning a negative s64 to __u32) anyways, and should be handled separately. Thanks Ted Lin for the analysis [1] . [1] https://github.com/raspberrypi/linux/issues/3512 Reported-by: Yuming Chen Fixes: 112f9cb65643 ("netem: convert to qdisc_watchdog_schedule_ns") Reviewed-by: Cong Wang Signed-off-by: Peilin Ye Acked-by: Stephen Hemminger Link: https://lore.kernel.org/r/20220616234336.2443-1-yepeilin.cs@gmail.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/sched/sch_netem.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 0c345e43a09a37..adc5407fd5d589 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -1146,9 +1146,9 @@ static int netem_dump(struct Qdisc *sch, struct sk_buff *skb) struct tc_netem_rate rate; struct tc_netem_slot slot; - qopt.latency = min_t(psched_tdiff_t, PSCHED_NS2TICKS(q->latency), + qopt.latency = min_t(psched_time_t, PSCHED_NS2TICKS(q->latency), UINT_MAX); - qopt.jitter = min_t(psched_tdiff_t, PSCHED_NS2TICKS(q->jitter), + qopt.jitter = min_t(psched_time_t, PSCHED_NS2TICKS(q->jitter), UINT_MAX); qopt.limit = q->limit; qopt.loss = q->loss; From 3c39a17197733bc37786ed68c83267c2f491840b Mon Sep 17 00:00:00 2001 From: Miaoqian Lin Date: Tue, 7 Jun 2022 15:08:38 +0400 Subject: [PATCH 0054/1056] drm/msm/mdp4: Fix refcount leak in mdp4_modeset_init_intf [ Upstream commit b9cc4598607cb7f7eae5c75fc1e3209cd52ff5e0 ] of_graph_get_remote_node() returns remote device node pointer with refcount incremented, we should use of_node_put() on it when not need anymore. Add missing of_node_put() to avoid refcount leak. Fixes: 86418f90a4c1 ("drm: convert drivers to use of_graph_get_remote_node") Signed-off-by: Miaoqian Lin Reviewed-by: Dmitry Baryshkov Reviewed-by: Stephen Boyd Reviewed-by: Abhinav Kumar Patchwork: https://patchwork.freedesktop.org/patch/488473/ Link: https://lore.kernel.org/r/20220607110841.53889-1-linmq006@gmail.com Signed-off-by: Rob Clark Signed-off-by: Sasha Levin --- drivers/gpu/drm/msm/disp/mdp4/mdp4_kms.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/msm/disp/mdp4/mdp4_kms.c b/drivers/gpu/drm/msm/disp/mdp4/mdp4_kms.c index cdcaf470f14805..97ae68182f3edb 100644 --- a/drivers/gpu/drm/msm/disp/mdp4/mdp4_kms.c +++ b/drivers/gpu/drm/msm/disp/mdp4/mdp4_kms.c @@ -223,6 +223,7 @@ static int mdp4_modeset_init_intf(struct mdp4_kms *mdp4_kms, encoder = mdp4_lcdc_encoder_init(dev, panel_node); if (IS_ERR(encoder)) { DRM_DEV_ERROR(dev->dev, "failed to construct LCDC encoder\n"); + of_node_put(panel_node); return PTR_ERR(encoder); } @@ -232,6 +233,7 @@ static int mdp4_modeset_init_intf(struct mdp4_kms *mdp4_kms, connector = mdp4_lvds_connector_init(dev, panel_node, encoder); if (IS_ERR(connector)) { DRM_DEV_ERROR(dev->dev, "failed to initialize LVDS connector\n"); + of_node_put(panel_node); return PTR_ERR(connector); } From d0b4a61f8713fe4d4b5cd7c525135cf31ad10fe9 Mon Sep 17 00:00:00 2001 From: Kuogee Hsieh Date: Mon, 6 Jun 2022 10:55:39 -0700 Subject: [PATCH 0055/1056] drm/msm/dp: check core_initialized before disable interrupts at dp_display_unbind() [ Upstream commit d80c3ba0ac247791a4ed7a0cd865a64906c8906a ] During msm initialize phase, dp_display_unbind() will be called to undo initializations had been done by dp_display_bind() previously if there is error happen at msm_drm_bind. In this case, core_initialized flag had to be check to make sure clocks is on before update DP controller register to disable HPD interrupts. Otherwise system will crash due to below NOC fatal error. QTISECLIB [01f01a7ad]CNOC2 ERROR: ERRLOG0_LOW = 0x00061007 QTISECLIB [01f01a7ad]GEM_NOC ERROR: ERRLOG0_LOW = 0x00001007 QTISECLIB [01f0371a0]CNOC2 ERROR: ERRLOG0_HIGH = 0x00000003 QTISECLIB [01f055297]GEM_NOC ERROR: ERRLOG0_HIGH = 0x00000003 QTISECLIB [01f072beb]CNOC2 ERROR: ERRLOG1_LOW = 0x00000024 QTISECLIB [01f0914b8]GEM_NOC ERROR: ERRLOG1_LOW = 0x00000042 QTISECLIB [01f0ae639]CNOC2 ERROR: ERRLOG1_HIGH = 0x00004002 QTISECLIB [01f0cc73f]GEM_NOC ERROR: ERRLOG1_HIGH = 0x00004002 QTISECLIB [01f0ea092]CNOC2 ERROR: ERRLOG2_LOW = 0x0009020c QTISECLIB [01f10895f]GEM_NOC ERROR: ERRLOG2_LOW = 0x0ae9020c QTISECLIB [01f125ae1]CNOC2 ERROR: ERRLOG2_HIGH = 0x00000000 QTISECLIB [01f143be7]GEM_NOC ERROR: ERRLOG2_HIGH = 0x00000000 QTISECLIB [01f16153a]CNOC2 ERROR: ERRLOG3_LOW = 0x00000000 QTISECLIB [01f17fe07]GEM_NOC ERROR: ERRLOG3_LOW = 0x00000000 QTISECLIB [01f19cf89]CNOC2 ERROR: ERRLOG3_HIGH = 0x00000000 QTISECLIB [01f1bb08e]GEM_NOC ERROR: ERRLOG3_HIGH = 0x00000000 QTISECLIB [01f1d8a31]CNOC2 ERROR: SBM1 FAULTINSTATUS0_LOW = 0x00000002 QTISECLIB [01f1f72a4]GEM_NOC ERROR: SBM0 FAULTINSTATUS0_LOW = 0x00000001 QTISECLIB [01f21a217]CNOC3 ERROR: ERRLOG0_LOW = 0x00000006 QTISECLIB [01f23dfd3]NOC error fatal changes in v2: -- drop the first patch (drm/msm: enable msm irq after all initializations are done successfully at msm_drm_init()) since the problem had been fixed by other patch Fixes: 570d3e5d28db ("drm/msm/dp: stop event kernel thread when DP unbind") Signed-off-by: Kuogee Hsieh Reviewed-by: Stephen Boyd Patchwork: https://patchwork.freedesktop.org/patch/488387/ Link: https://lore.kernel.org/r/1654538139-7450-1-git-send-email-quic_khsieh@quicinc.com Signed-off-by: Rob Clark Signed-off-by: Sasha Levin --- drivers/gpu/drm/msm/dp/dp_display.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/msm/dp/dp_display.c b/drivers/gpu/drm/msm/dp/dp_display.c index 7b624191abf196..8b51a5cc3eb8d9 100644 --- a/drivers/gpu/drm/msm/dp/dp_display.c +++ b/drivers/gpu/drm/msm/dp/dp_display.c @@ -260,7 +260,8 @@ static void dp_display_unbind(struct device *dev, struct device *master, struct dp_display_private, dp_display); /* disable all HPD interrupts */ - dp_catalog_hpd_config_intr(dp->catalog, DP_DP_HPD_INT_MASK, false); + if (dp->core_initialized) + dp_catalog_hpd_config_intr(dp->catalog, DP_DP_HPD_INT_MASK, false); kthread_stop(dp->ev_tsk); From 2ecf5ff9aa14600536d5790451afd5ebba29d739 Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Sat, 6 Nov 2021 10:22:46 -0700 Subject: [PATCH 0056/1056] drm/msm/dp: Drop now unused hpd_high member [ Upstream commit fabae667b1263216be53e0230cd3966a9a1963a4 ] Since '8ede2ecc3e5e ("drm/msm/dp: Add DP compliance tests on Snapdragon Chipsets")' the hpd_high member of struct dp_usbpd has been write-only. Let's clean up the code a little bit by removing the writes as well. Signed-off-by: Bjorn Andersson Reviewed-by: Stephen Boyd Link: https://lore.kernel.org/r/20211106172246.2597431-1-bjorn.andersson@linaro.org Signed-off-by: Rob Clark Signed-off-by: Sasha Levin --- drivers/gpu/drm/msm/dp/dp_display.c | 6 ------ drivers/gpu/drm/msm/dp/dp_hpd.c | 2 -- drivers/gpu/drm/msm/dp/dp_hpd.h | 2 -- 3 files changed, 10 deletions(-) diff --git a/drivers/gpu/drm/msm/dp/dp_display.c b/drivers/gpu/drm/msm/dp/dp_display.c index 8b51a5cc3eb8d9..a66ee63253a314 100644 --- a/drivers/gpu/drm/msm/dp/dp_display.c +++ b/drivers/gpu/drm/msm/dp/dp_display.c @@ -547,11 +547,8 @@ static int dp_hpd_plug_handle(struct dp_display_private *dp, u32 data) dp->hpd_state = ST_CONNECT_PENDING; - hpd->hpd_high = 1; - ret = dp_display_usbpd_configure_cb(&dp->pdev->dev); if (ret) { /* link train failed */ - hpd->hpd_high = 0; dp->hpd_state = ST_DISCONNECTED; if (ret == -ECONNRESET) { /* cable unplugged */ @@ -628,7 +625,6 @@ static int dp_hpd_unplug_handle(struct dp_display_private *dp, u32 data) /* triggered by irq_hdp with sink_count = 0 */ if (dp->link->sink_count == 0) { dp_ctrl_off_phy(dp->ctrl); - hpd->hpd_high = 0; dp->core_initialized = false; } mutex_unlock(&dp->event_mutex); @@ -652,8 +648,6 @@ static int dp_hpd_unplug_handle(struct dp_display_private *dp, u32 data) /* disable HPD plug interrupts */ dp_catalog_hpd_config_intr(dp->catalog, DP_DP_HPD_PLUG_INT_MASK, false); - hpd->hpd_high = 0; - /* * We don't need separate work for disconnect as * connect/attention interrupts are disabled diff --git a/drivers/gpu/drm/msm/dp/dp_hpd.c b/drivers/gpu/drm/msm/dp/dp_hpd.c index e1c90fa47411f7..db98a1d431eb6a 100644 --- a/drivers/gpu/drm/msm/dp/dp_hpd.c +++ b/drivers/gpu/drm/msm/dp/dp_hpd.c @@ -32,8 +32,6 @@ int dp_hpd_connect(struct dp_usbpd *dp_usbpd, bool hpd) hpd_priv = container_of(dp_usbpd, struct dp_hpd_private, dp_usbpd); - dp_usbpd->hpd_high = hpd; - if (!hpd_priv->dp_cb || !hpd_priv->dp_cb->configure || !hpd_priv->dp_cb->disconnect) { pr_err("hpd dp_cb not initialized\n"); diff --git a/drivers/gpu/drm/msm/dp/dp_hpd.h b/drivers/gpu/drm/msm/dp/dp_hpd.h index 5bc5bb64680fb0..8feec5aa502718 100644 --- a/drivers/gpu/drm/msm/dp/dp_hpd.h +++ b/drivers/gpu/drm/msm/dp/dp_hpd.h @@ -26,7 +26,6 @@ enum plug_orientation { * @multi_func: multi-function preferred * @usb_config_req: request to switch to usb * @exit_dp_mode: request exit from displayport mode - * @hpd_high: Hot Plug Detect signal is high. * @hpd_irq: Change in the status since last message * @alt_mode_cfg_done: bool to specify alt mode status * @debug_en: bool to specify debug mode @@ -39,7 +38,6 @@ struct dp_usbpd { bool multi_func; bool usb_config_req; bool exit_dp_mode; - bool hpd_high; bool hpd_irq; bool alt_mode_cfg_done; bool debug_en; From 40e9efdc2ef9e6bbd7712936606137ec3c94e32b Mon Sep 17 00:00:00 2001 From: Kuogee Hsieh Date: Wed, 8 Dec 2021 09:41:02 -0800 Subject: [PATCH 0057/1056] drm/msm/dp: dp_link_parse_sink_count() return immediately if aux read failed [ Upstream commit f61550b3864b9578527c28cf9c465316ac1566e1 ] Add checking aux read/write status at both dp_link_parse_sink_count() and dp_link_parse_sink_status_filed() to avoid long timeout delay if dp aux read/write failed at timeout due to cable unplugged. Changes in V4: -- split this patch as stand alone patch Changes in v5: -- rebase on msm-next branch Signed-off-by: Kuogee Hsieh Reviewed-by: Stephen Boyd Tested-by: Stephen Boyd Link: https://lore.kernel.org/r/1638985262-2072-1-git-send-email-quic_khsieh@quicinc.com Signed-off-by: Rob Clark Signed-off-by: Sasha Levin --- drivers/gpu/drm/msm/dp/dp_display.c | 12 +++++++++--- drivers/gpu/drm/msm/dp/dp_link.c | 19 ++++++++++++++----- 2 files changed, 23 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/msm/dp/dp_display.c b/drivers/gpu/drm/msm/dp/dp_display.c index a66ee63253a314..32b8dbb917bfde 100644 --- a/drivers/gpu/drm/msm/dp/dp_display.c +++ b/drivers/gpu/drm/msm/dp/dp_display.c @@ -712,9 +712,15 @@ static int dp_irq_hpd_handle(struct dp_display_private *dp, u32 data) return 0; } - ret = dp_display_usbpd_attention_cb(&dp->pdev->dev); - if (ret == -ECONNRESET) { /* cable unplugged */ - dp->core_initialized = false; + /* + * dp core (ahb/aux clks) must be initialized before + * irq_hpd be handled + */ + if (dp->core_initialized) { + ret = dp_display_usbpd_attention_cb(&dp->pdev->dev); + if (ret == -ECONNRESET) { /* cable unplugged */ + dp->core_initialized = false; + } } DRM_DEBUG_DP("hpd_state=%d\n", state); diff --git a/drivers/gpu/drm/msm/dp/dp_link.c b/drivers/gpu/drm/msm/dp/dp_link.c index a5bdfc5029deeb..d4d31e5bda0709 100644 --- a/drivers/gpu/drm/msm/dp/dp_link.c +++ b/drivers/gpu/drm/msm/dp/dp_link.c @@ -737,18 +737,25 @@ static int dp_link_parse_sink_count(struct dp_link *dp_link) return 0; } -static void dp_link_parse_sink_status_field(struct dp_link_private *link) +static int dp_link_parse_sink_status_field(struct dp_link_private *link) { int len = 0; link->prev_sink_count = link->dp_link.sink_count; - dp_link_parse_sink_count(&link->dp_link); + len = dp_link_parse_sink_count(&link->dp_link); + if (len < 0) { + DRM_ERROR("DP parse sink count failed\n"); + return len; + } len = drm_dp_dpcd_read_link_status(link->aux, link->link_status); - if (len < DP_LINK_STATUS_SIZE) + if (len < DP_LINK_STATUS_SIZE) { DRM_ERROR("DP link status read failed\n"); - dp_link_parse_request(link); + return len; + } + + return dp_link_parse_request(link); } /** @@ -1023,7 +1030,9 @@ int dp_link_process_request(struct dp_link *dp_link) dp_link_reset_data(link); - dp_link_parse_sink_status_field(link); + ret = dp_link_parse_sink_status_field(link); + if (ret) + return ret; if (link->request.test_requested == DP_TEST_LINK_EDID_READ) { dp_link->sink_request |= DP_TEST_LINK_EDID_READ; From 796d3acd7d9ed88a0309d123f6f6b235136043c5 Mon Sep 17 00:00:00 2001 From: Kuogee Hsieh Date: Tue, 18 Jan 2022 10:47:25 -0800 Subject: [PATCH 0058/1056] drm/msm/dp: do not initialize phy until plugin interrupt received [ Upstream commit 989ebe7bc4463002c210db0010c8475797a9098f ] Current DP drivers have regulators, clocks, irq and phy are grouped together within a function and executed not in a symmetric manner. This increase difficulty of code maintenance and limited code scalability. This patch divides the driver life cycle of operation into four states, resume (including booting up), dongle plugin, dongle unplugged and suspend. Regulators, core clocks and irq are grouped together and enabled at resume (or booting up) so that the DP controller is armed and ready to receive HPD plugin interrupts. HPD plugin interrupt is generated when a dongle plugs into DUT (device under test). Once HPD plugin interrupt is received, DP controller will initialize phy so that dpcd read/write will function and following link training can be proceeded successfully. DP phy will be disabled after main link is teared down at end of unplugged HPD interrupt handle triggered by dongle unplugged out of DUT. Finally regulators, code clocks and irq are disabled at corresponding suspension. Changes in V2: -- removed unnecessary dp_ctrl NULL check -- removed unnecessary phy init_count and power_count DRM_DEBUG_DP logs -- remove flip parameter out of dp_ctrl_irq_enable() -- add fixes tag Changes in V3: -- call dp_display_host_phy_init() instead of dp_ctrl_phy_init() at dp_display_host_init() for eDP Changes in V4: -- rewording commit text to match this commit changes Changes in V5: -- rebase on top of msm-next branch Changes in V6: -- delete flip variable Changes in V7: -- dp_ctrl_irq_enable/disabe() merged into dp_ctrl_reset_irq_ctrl() Changes in V8: -- add more detail comment regrading dp phy at dp_display_host_init() Changes in V9: -- remove set phy_initialized to false when -ECONNRESET detected Changes in v10: -- group into one series Changes in v11: -- drop drm/msm/dp: dp_link_parse_sink_count() return immediately if aux read Changes in v12: -- move dp_display_host_phy_exit() after dp_display_host_deinit() Changes in v13: -- do not execute phy_init until plugged_in interrupt for edp, same as DP. Changes in v14: -- remove redundant dp->core_initialized = false form dp_pm_suspend. Changes in v15: -- remove core_initialized flag check at both host_init and host_deinit Changes in v16: -- remove dp_display_host_phy_exit core_initialized=false at dp_pm_suspend Changes in v17: -- remove core_initialized checking before execute attention_cb() Changes in v18: -- remove core_initialized checking at dp_pm_suspend Fixes: 8ede2ecc3e5e ("drm/msm/dp: Add DP compliance tests on Snapdragon Chipsets") Signed-off-by: Kuogee Hsieh Reviewed-by: Stephen Boyd Link: https://lore.kernel.org/r/1642531648-8448-2-git-send-email-quic_khsieh@quicinc.com Signed-off-by: Dmitry Baryshkov Signed-off-by: Sasha Levin --- drivers/gpu/drm/msm/dp/dp_ctrl.c | 80 ++++++++------------ drivers/gpu/drm/msm/dp/dp_ctrl.h | 8 +- drivers/gpu/drm/msm/dp/dp_display.c | 111 ++++++++++++++-------------- 3 files changed, 92 insertions(+), 107 deletions(-) diff --git a/drivers/gpu/drm/msm/dp/dp_ctrl.c b/drivers/gpu/drm/msm/dp/dp_ctrl.c index 1992347537e659..0776b1960f2181 100644 --- a/drivers/gpu/drm/msm/dp/dp_ctrl.c +++ b/drivers/gpu/drm/msm/dp/dp_ctrl.c @@ -1348,60 +1348,44 @@ static int dp_ctrl_enable_stream_clocks(struct dp_ctrl_private *ctrl) return ret; } -int dp_ctrl_host_init(struct dp_ctrl *dp_ctrl, bool flip, bool reset) +void dp_ctrl_reset_irq_ctrl(struct dp_ctrl *dp_ctrl, bool enable) +{ + struct dp_ctrl_private *ctrl; + + ctrl = container_of(dp_ctrl, struct dp_ctrl_private, dp_ctrl); + + dp_catalog_ctrl_reset(ctrl->catalog); + + if (enable) + dp_catalog_ctrl_enable_irq(ctrl->catalog, enable); +} + +void dp_ctrl_phy_init(struct dp_ctrl *dp_ctrl) { struct dp_ctrl_private *ctrl; struct dp_io *dp_io; struct phy *phy; - if (!dp_ctrl) { - DRM_ERROR("Invalid input data\n"); - return -EINVAL; - } - ctrl = container_of(dp_ctrl, struct dp_ctrl_private, dp_ctrl); dp_io = &ctrl->parser->io; phy = dp_io->phy; - ctrl->dp_ctrl.orientation = flip; - - if (reset) - dp_catalog_ctrl_reset(ctrl->catalog); - - DRM_DEBUG_DP("flip=%d\n", flip); dp_catalog_ctrl_phy_reset(ctrl->catalog); phy_init(phy); - dp_catalog_ctrl_enable_irq(ctrl->catalog, true); - - return 0; } -/** - * dp_ctrl_host_deinit() - Uninitialize DP controller - * @dp_ctrl: Display Port Driver data - * - * Perform required steps to uninitialize DP controller - * and its resources. - */ -void dp_ctrl_host_deinit(struct dp_ctrl *dp_ctrl) +void dp_ctrl_phy_exit(struct dp_ctrl *dp_ctrl) { struct dp_ctrl_private *ctrl; struct dp_io *dp_io; struct phy *phy; - if (!dp_ctrl) { - DRM_ERROR("Invalid input data\n"); - return; - } - ctrl = container_of(dp_ctrl, struct dp_ctrl_private, dp_ctrl); dp_io = &ctrl->parser->io; phy = dp_io->phy; - dp_catalog_ctrl_enable_irq(ctrl->catalog, false); + dp_catalog_ctrl_phy_reset(ctrl->catalog); phy_exit(phy); - - DRM_DEBUG_DP("Host deinitialized successfully\n"); } static bool dp_ctrl_use_fixed_nvid(struct dp_ctrl_private *ctrl) @@ -1471,7 +1455,10 @@ static int dp_ctrl_deinitialize_mainlink(struct dp_ctrl_private *ctrl) } phy_power_off(phy); + + /* aux channel down, reinit phy */ phy_exit(phy); + phy_init(phy); return 0; } @@ -1877,8 +1864,14 @@ int dp_ctrl_off_link_stream(struct dp_ctrl *dp_ctrl) return ret; } + DRM_DEBUG_DP("Before, phy=%x init_count=%d power_on=%d\n", + (u32)(uintptr_t)phy, phy->init_count, phy->power_count); + phy_power_off(phy); + DRM_DEBUG_DP("After, phy=%x init_count=%d power_on=%d\n", + (u32)(uintptr_t)phy, phy->init_count, phy->power_count); + /* aux channel down, reinit phy */ phy_exit(phy); phy_init(phy); @@ -1887,23 +1880,6 @@ int dp_ctrl_off_link_stream(struct dp_ctrl *dp_ctrl) return ret; } -void dp_ctrl_off_phy(struct dp_ctrl *dp_ctrl) -{ - struct dp_ctrl_private *ctrl; - struct dp_io *dp_io; - struct phy *phy; - - ctrl = container_of(dp_ctrl, struct dp_ctrl_private, dp_ctrl); - dp_io = &ctrl->parser->io; - phy = dp_io->phy; - - dp_catalog_ctrl_reset(ctrl->catalog); - - phy_exit(phy); - - DRM_DEBUG_DP("DP off phy done\n"); -} - int dp_ctrl_off(struct dp_ctrl *dp_ctrl) { struct dp_ctrl_private *ctrl; @@ -1931,10 +1907,14 @@ int dp_ctrl_off(struct dp_ctrl *dp_ctrl) DRM_ERROR("Failed to disable link clocks. ret=%d\n", ret); } + DRM_DEBUG_DP("Before, phy=%x init_count=%d power_on=%d\n", + (u32)(uintptr_t)phy, phy->init_count, phy->power_count); + phy_power_off(phy); - phy_exit(phy); - DRM_DEBUG_DP("DP off done\n"); + DRM_DEBUG_DP("After, phy=%x init_count=%d power_on=%d\n", + (u32)(uintptr_t)phy, phy->init_count, phy->power_count); + return ret; } diff --git a/drivers/gpu/drm/msm/dp/dp_ctrl.h b/drivers/gpu/drm/msm/dp/dp_ctrl.h index 2363a2df9597b5..2433edbc70a6d4 100644 --- a/drivers/gpu/drm/msm/dp/dp_ctrl.h +++ b/drivers/gpu/drm/msm/dp/dp_ctrl.h @@ -19,12 +19,9 @@ struct dp_ctrl { u32 pixel_rate; }; -int dp_ctrl_host_init(struct dp_ctrl *dp_ctrl, bool flip, bool reset); -void dp_ctrl_host_deinit(struct dp_ctrl *dp_ctrl); int dp_ctrl_on_link(struct dp_ctrl *dp_ctrl); int dp_ctrl_on_stream(struct dp_ctrl *dp_ctrl); int dp_ctrl_off_link_stream(struct dp_ctrl *dp_ctrl); -void dp_ctrl_off_phy(struct dp_ctrl *dp_ctrl); int dp_ctrl_off(struct dp_ctrl *dp_ctrl); void dp_ctrl_push_idle(struct dp_ctrl *dp_ctrl); void dp_ctrl_isr(struct dp_ctrl *dp_ctrl); @@ -34,4 +31,9 @@ struct dp_ctrl *dp_ctrl_get(struct device *dev, struct dp_link *link, struct dp_power *power, struct dp_catalog *catalog, struct dp_parser *parser); +void dp_ctrl_reset_irq_ctrl(struct dp_ctrl *dp_ctrl, bool enable); +void dp_ctrl_phy_init(struct dp_ctrl *dp_ctrl); +void dp_ctrl_phy_exit(struct dp_ctrl *dp_ctrl); +void dp_ctrl_irq_phy_exit(struct dp_ctrl *dp_ctrl); + #endif /* _DP_CTRL_H_ */ diff --git a/drivers/gpu/drm/msm/dp/dp_display.c b/drivers/gpu/drm/msm/dp/dp_display.c index 32b8dbb917bfde..bbd0bf8201920f 100644 --- a/drivers/gpu/drm/msm/dp/dp_display.c +++ b/drivers/gpu/drm/msm/dp/dp_display.c @@ -81,6 +81,7 @@ struct dp_display_private { /* state variables */ bool core_initialized; + bool phy_initialized; bool hpd_irq_on; bool audio_supported; @@ -362,36 +363,45 @@ static int dp_display_process_hpd_high(struct dp_display_private *dp) return rc; } -static void dp_display_host_init(struct dp_display_private *dp, int reset) +static void dp_display_host_phy_init(struct dp_display_private *dp) { - bool flip = false; + DRM_DEBUG_DP("core_init=%d phy_init=%d\n", + dp->core_initialized, dp->phy_initialized); - DRM_DEBUG_DP("core_initialized=%d\n", dp->core_initialized); - if (dp->core_initialized) { - DRM_DEBUG_DP("DP core already initialized\n"); - return; + if (!dp->phy_initialized) { + dp_ctrl_phy_init(dp->ctrl); + dp->phy_initialized = true; } +} + +static void dp_display_host_phy_exit(struct dp_display_private *dp) +{ + DRM_DEBUG_DP("core_init=%d phy_init=%d\n", + dp->core_initialized, dp->phy_initialized); - if (dp->usbpd->orientation == ORIENTATION_CC2) - flip = true; + if (dp->phy_initialized) { + dp_ctrl_phy_exit(dp->ctrl); + dp->phy_initialized = false; + } +} + +static void dp_display_host_init(struct dp_display_private *dp) +{ + DRM_DEBUG_DP("core_initialized=%d\n", dp->core_initialized); - dp_power_init(dp->power, flip); - dp_ctrl_host_init(dp->ctrl, flip, reset); + dp_power_init(dp->power, false); + dp_ctrl_reset_irq_ctrl(dp->ctrl, true); dp_aux_init(dp->aux); dp->core_initialized = true; } static void dp_display_host_deinit(struct dp_display_private *dp) { - if (!dp->core_initialized) { - DRM_DEBUG_DP("DP core not initialized\n"); - return; - } + DRM_DEBUG_DP("core_initialized=%d\n", dp->core_initialized); - dp_ctrl_host_deinit(dp->ctrl); + dp_ctrl_reset_irq_ctrl(dp->ctrl, false); dp_aux_deinit(dp->aux); dp_power_deinit(dp->power); - dp->core_initialized = false; } @@ -409,7 +419,7 @@ static int dp_display_usbpd_configure_cb(struct device *dev) dp = container_of(g_dp_display, struct dp_display_private, dp_display); - dp_display_host_init(dp, false); + dp_display_host_phy_init(dp); rc = dp_display_process_hpd_high(dp); end: @@ -550,11 +560,6 @@ static int dp_hpd_plug_handle(struct dp_display_private *dp, u32 data) ret = dp_display_usbpd_configure_cb(&dp->pdev->dev); if (ret) { /* link train failed */ dp->hpd_state = ST_DISCONNECTED; - - if (ret == -ECONNRESET) { /* cable unplugged */ - dp->core_initialized = false; - } - } else { /* start sentinel checking in case of missing uevent */ dp_add_event(dp, EV_CONNECT_PENDING_TIMEOUT, 0, tout); @@ -624,8 +629,7 @@ static int dp_hpd_unplug_handle(struct dp_display_private *dp, u32 data) if (state == ST_DISCONNECTED) { /* triggered by irq_hdp with sink_count = 0 */ if (dp->link->sink_count == 0) { - dp_ctrl_off_phy(dp->ctrl); - dp->core_initialized = false; + dp_display_host_phy_exit(dp); } mutex_unlock(&dp->event_mutex); return 0; @@ -687,7 +691,6 @@ static int dp_disconnect_pending_timeout(struct dp_display_private *dp, u32 data static int dp_irq_hpd_handle(struct dp_display_private *dp, u32 data) { u32 state; - int ret; mutex_lock(&dp->event_mutex); @@ -712,16 +715,8 @@ static int dp_irq_hpd_handle(struct dp_display_private *dp, u32 data) return 0; } - /* - * dp core (ahb/aux clks) must be initialized before - * irq_hpd be handled - */ - if (dp->core_initialized) { - ret = dp_display_usbpd_attention_cb(&dp->pdev->dev); - if (ret == -ECONNRESET) { /* cable unplugged */ - dp->core_initialized = false; - } - } + dp_display_usbpd_attention_cb(&dp->pdev->dev); + DRM_DEBUG_DP("hpd_state=%d\n", state); mutex_unlock(&dp->event_mutex); @@ -916,12 +911,19 @@ static int dp_display_disable(struct dp_display_private *dp, u32 data) dp_display->audio_enabled = false; - /* triggered by irq_hpd with sink_count = 0 */ if (dp->link->sink_count == 0) { + /* + * irq_hpd with sink_count = 0 + * hdmi unplugged out of dongle + */ dp_ctrl_off_link_stream(dp->ctrl); } else { + /* + * unplugged interrupt + * dongle unplugged out of DUT + */ dp_ctrl_off(dp->ctrl); - dp->core_initialized = false; + dp_display_host_phy_exit(dp); } dp_display->power_on = false; @@ -1051,7 +1053,7 @@ void msm_dp_snapshot(struct msm_disp_state *disp_state, struct msm_dp *dp) static void dp_display_config_hpd(struct dp_display_private *dp) { - dp_display_host_init(dp, true); + dp_display_host_init(dp); dp_catalog_ctrl_hpd_config(dp->catalog); /* Enable interrupt first time @@ -1318,20 +1320,23 @@ static int dp_pm_resume(struct device *dev) dp->hpd_state = ST_DISCONNECTED; /* turn on dp ctrl/phy */ - dp_display_host_init(dp, true); + dp_display_host_init(dp); dp_catalog_ctrl_hpd_config(dp->catalog); - /* - * set sink to normal operation mode -- D0 - * before dpcd read - */ - dp_link_psm_config(dp->link, &dp->panel->link_info, false); if (dp_catalog_link_is_connected(dp->catalog)) { + /* + * set sink to normal operation mode -- D0 + * before dpcd read + */ + dp_display_host_phy_init(dp); + dp_link_psm_config(dp->link, &dp->panel->link_info, false); sink_count = drm_dp_read_sink_count(dp->aux); if (sink_count < 0) sink_count = 0; + + dp_display_host_phy_exit(dp); } dp->link->sink_count = sink_count; @@ -1370,18 +1375,16 @@ static int dp_pm_suspend(struct device *dev) DRM_DEBUG_DP("Before, core_inited=%d power_on=%d\n", dp->core_initialized, dp_display->power_on); - if (dp->core_initialized == true) { - /* mainlink enabled */ - if (dp_power_clk_status(dp->power, DP_CTRL_PM)) - dp_ctrl_off_link_stream(dp->ctrl); + /* mainlink enabled */ + if (dp_power_clk_status(dp->power, DP_CTRL_PM)) + dp_ctrl_off_link_stream(dp->ctrl); - dp_display_host_deinit(dp); - } - - dp->hpd_state = ST_SUSPENDED; + dp_display_host_phy_exit(dp); /* host_init will be called at pm_resume */ - dp->core_initialized = false; + dp_display_host_deinit(dp); + + dp->hpd_state = ST_SUSPENDED; DRM_DEBUG_DP("After, core_inited=%d power_on=%d\n", dp->core_initialized, dp_display->power_on); @@ -1538,7 +1541,7 @@ int msm_dp_display_enable(struct msm_dp *dp, struct drm_encoder *encoder) state = dp_display->hpd_state; if (state == ST_DISPLAY_OFF) - dp_display_host_init(dp_display, true); + dp_display_host_phy_init(dp_display); dp_display_enable(dp_display, 0); From e24709e89b1b0840647ddbdd7d9e129ba8d47904 Mon Sep 17 00:00:00 2001 From: Kuogee Hsieh Date: Thu, 16 Jun 2022 13:26:40 -0700 Subject: [PATCH 0059/1056] drm/msm/dp: force link training for display resolution change [ Upstream commit a6e2af64a79afa7f1b29375b5231e840a84bb845 ] Display resolution change is implemented through drm modeset. Older modeset (resolution) has to be disabled first before newer modeset (resolution) can be enabled. Display disable will turn off both pixel clock and main link clock so that main link have to be re-trained during display enable to have new video stream flow again. At current implementation, display enable function manually kicks up irq_hpd_handle which will read panel link status and start link training if link status is not in sync state. However, there is rare case that a particular panel links status keep staying in sync for some period of time after main link had been shut down previously at display disabled. In this case, main link retraining will not be executed by irq_hdp_handle(). Hence video stream of newer display resolution will fail to be transmitted to panel due to main link is not in sync between host and panel. This patch will bypass irq_hpd_handle() in favor of directly call dp_ctrl_on_stream() to always perform link training in regardless of main link status. So that no unexpected exception resolution change failure cases will happen. Also this implementation are more efficient than manual kicking off irq_hpd_handle function. Changes in v2: -- set force_link_train flag on DP only (is_edp == false) Changes in v3: -- revise commit text -- add Fixes tag Changes in v4: -- revise commit text Changes in v5: -- fix spelling at commit text Changes in v6: -- split dp_ctrl_on_stream() for phy test case -- revise commit text for modeset Changes in v7: -- drop 0 assignment at local variable (ret = 0) Changes in v8: -- add patch to remove pixel_rate from dp_ctrl Changes in v9: -- forward declare dp_ctrl_on_stream_phy_test_report() Fixes: 62671d2ef24b ("drm/msm/dp: fixes wrong connection state caused by failure of link train") Signed-off-by: Kuogee Hsieh Reviewed-by: Stephen Boyd Patchwork: https://patchwork.freedesktop.org/patch/489895/ Link: https://lore.kernel.org/r/1655411200-7255-1-git-send-email-quic_khsieh@quicinc.com Signed-off-by: Rob Clark Signed-off-by: Sasha Levin --- drivers/gpu/drm/msm/dp/dp_ctrl.c | 33 ++++++++++++++++++++++------- drivers/gpu/drm/msm/dp/dp_ctrl.h | 2 +- drivers/gpu/drm/msm/dp/dp_display.c | 13 ++++++------ 3 files changed, 32 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/msm/dp/dp_ctrl.c b/drivers/gpu/drm/msm/dp/dp_ctrl.c index 0776b1960f2181..afffdad0ebf923 100644 --- a/drivers/gpu/drm/msm/dp/dp_ctrl.c +++ b/drivers/gpu/drm/msm/dp/dp_ctrl.c @@ -1488,6 +1488,8 @@ static int dp_ctrl_link_maintenance(struct dp_ctrl_private *ctrl) return ret; } +static int dp_ctrl_on_stream_phy_test_report(struct dp_ctrl *dp_ctrl); + static int dp_ctrl_process_phy_test_request(struct dp_ctrl_private *ctrl) { int ret = 0; @@ -1510,7 +1512,7 @@ static int dp_ctrl_process_phy_test_request(struct dp_ctrl_private *ctrl) ret = dp_ctrl_on_link(&ctrl->dp_ctrl); if (!ret) - ret = dp_ctrl_on_stream(&ctrl->dp_ctrl); + ret = dp_ctrl_on_stream_phy_test_report(&ctrl->dp_ctrl); else DRM_ERROR("failed to enable DP link controller\n"); @@ -1765,7 +1767,27 @@ static int dp_ctrl_link_retrain(struct dp_ctrl_private *ctrl) return dp_ctrl_setup_main_link(ctrl, &training_step); } -int dp_ctrl_on_stream(struct dp_ctrl *dp_ctrl) +static int dp_ctrl_on_stream_phy_test_report(struct dp_ctrl *dp_ctrl) +{ + int ret; + struct dp_ctrl_private *ctrl; + + ctrl = container_of(dp_ctrl, struct dp_ctrl_private, dp_ctrl); + + ctrl->dp_ctrl.pixel_rate = ctrl->panel->dp_mode.drm_mode.clock; + + ret = dp_ctrl_enable_stream_clocks(ctrl); + if (ret) { + DRM_ERROR("Failed to start pixel clocks. ret=%d\n", ret); + return ret; + } + + dp_ctrl_send_phy_test_pattern(ctrl); + + return 0; +} + +int dp_ctrl_on_stream(struct dp_ctrl *dp_ctrl, bool force_link_train) { int ret = 0; bool mainlink_ready = false; @@ -1796,12 +1818,7 @@ int dp_ctrl_on_stream(struct dp_ctrl *dp_ctrl) goto end; } - if (ctrl->link->sink_request & DP_TEST_LINK_PHY_TEST_PATTERN) { - dp_ctrl_send_phy_test_pattern(ctrl); - return 0; - } - - if (!dp_ctrl_channel_eq_ok(ctrl)) + if (force_link_train || !dp_ctrl_channel_eq_ok(ctrl)) dp_ctrl_link_retrain(ctrl); /* stop txing train pattern to end link training */ diff --git a/drivers/gpu/drm/msm/dp/dp_ctrl.h b/drivers/gpu/drm/msm/dp/dp_ctrl.h index 2433edbc70a6d4..dcc7af21a5f050 100644 --- a/drivers/gpu/drm/msm/dp/dp_ctrl.h +++ b/drivers/gpu/drm/msm/dp/dp_ctrl.h @@ -20,7 +20,7 @@ struct dp_ctrl { }; int dp_ctrl_on_link(struct dp_ctrl *dp_ctrl); -int dp_ctrl_on_stream(struct dp_ctrl *dp_ctrl); +int dp_ctrl_on_stream(struct dp_ctrl *dp_ctrl, bool force_link_train); int dp_ctrl_off_link_stream(struct dp_ctrl *dp_ctrl); int dp_ctrl_off(struct dp_ctrl *dp_ctrl); void dp_ctrl_push_idle(struct dp_ctrl *dp_ctrl); diff --git a/drivers/gpu/drm/msm/dp/dp_display.c b/drivers/gpu/drm/msm/dp/dp_display.c index bbd0bf8201920f..b141ccb527b005 100644 --- a/drivers/gpu/drm/msm/dp/dp_display.c +++ b/drivers/gpu/drm/msm/dp/dp_display.c @@ -865,7 +865,7 @@ static int dp_display_enable(struct dp_display_private *dp, u32 data) return 0; } - rc = dp_ctrl_on_stream(dp->ctrl); + rc = dp_ctrl_on_stream(dp->ctrl, data); if (!rc) dp_display->power_on = true; @@ -1512,6 +1512,7 @@ int msm_dp_display_enable(struct msm_dp *dp, struct drm_encoder *encoder) int rc = 0; struct dp_display_private *dp_display; u32 state; + bool force_link_train = false; dp_display = container_of(dp, struct dp_display_private, dp_display); if (!dp_display->dp_mode.drm_mode.clock) { @@ -1540,10 +1541,12 @@ int msm_dp_display_enable(struct msm_dp *dp, struct drm_encoder *encoder) state = dp_display->hpd_state; - if (state == ST_DISPLAY_OFF) + if (state == ST_DISPLAY_OFF) { dp_display_host_phy_init(dp_display); + force_link_train = true; + } - dp_display_enable(dp_display, 0); + dp_display_enable(dp_display, force_link_train); rc = dp_display_post_enable(dp); if (rc) { @@ -1552,10 +1555,6 @@ int msm_dp_display_enable(struct msm_dp *dp, struct drm_encoder *encoder) dp_display_unprepare(dp); } - /* manual kick off plug event to train link */ - if (state == ST_DISPLAY_OFF) - dp_add_event(dp_display, EV_IRQ_HPD_INT, 0, 0); - /* completed connection */ dp_display->hpd_state = ST_CONNECTED; From 26e70f8989cb8dbdfe65d8b13824e67360fb2353 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Tue, 17 May 2022 02:03:25 +0000 Subject: [PATCH 0060/1056] perf arm-spe: Don't set data source if it's not a memory operation [ Upstream commit 51ba539f5bdb5a8cc7b1dedd5e73ac54564a7602 ] Except for memory load and store operations, ARM SPE records also can support other operation types, bug when set the data source field the current code assumes a record is a either load operation or store operation, this leads to wrongly synthesize memory samples. This patch strictly checks the record operation type, it only sets data source only for the operation types ARM_SPE_LD and ARM_SPE_ST, otherwise, returns zero for data source. Therefore, we can synthesize memory samples only when data source is a non-zero value, the function arm_spe__is_memory_event() is useless and removed. Fixes: e55ed3423c1bb29f ("perf arm-spe: Synthesize memory event") Reviewed-by: Ali Saidi Reviewed-by: German Gomez Signed-off-by: Leo Yan Tested-by: Ali Saidi Cc: Alexander Shishkin Cc: alisaidi@amazon.com Cc: Andrew Kilroy Cc: Benjamin Herrenschmidt Cc: James Clark Cc: Jiri Olsa Cc: John Garry Cc: Kajol Jain Cc: Leo Yan Cc: Li Huafei Cc: linux-arm-kernel@lists.infradead.org Cc: Mark Rutland Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Nick Forrington Cc: Peter Zijlstra Cc: Will Deacon Link: http://lore.kernel.org/lkml/20220517020326.18580-5-alisaidi@amazon.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/util/arm-spe.c | 22 ++++++++-------------- 1 file changed, 8 insertions(+), 14 deletions(-) diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c index 235549bb28b94f..569e1b8ad0abc5 100644 --- a/tools/perf/util/arm-spe.c +++ b/tools/perf/util/arm-spe.c @@ -312,26 +312,16 @@ static int arm_spe__synth_branch_sample(struct arm_spe_queue *speq, return arm_spe_deliver_synth_event(spe, speq, event, &sample); } -#define SPE_MEM_TYPE (ARM_SPE_L1D_ACCESS | ARM_SPE_L1D_MISS | \ - ARM_SPE_LLC_ACCESS | ARM_SPE_LLC_MISS | \ - ARM_SPE_REMOTE_ACCESS) - -static bool arm_spe__is_memory_event(enum arm_spe_sample_type type) -{ - if (type & SPE_MEM_TYPE) - return true; - - return false; -} - static u64 arm_spe__synth_data_source(const struct arm_spe_record *record) { union perf_mem_data_src data_src = { 0 }; if (record->op == ARM_SPE_LD) data_src.mem_op = PERF_MEM_OP_LOAD; - else + else if (record->op == ARM_SPE_ST) data_src.mem_op = PERF_MEM_OP_STORE; + else + return 0; if (record->type & (ARM_SPE_LLC_ACCESS | ARM_SPE_LLC_MISS)) { data_src.mem_lvl = PERF_MEM_LVL_L3; @@ -435,7 +425,11 @@ static int arm_spe_sample(struct arm_spe_queue *speq) return err; } - if (spe->sample_memory && arm_spe__is_memory_event(record->type)) { + /* + * When data_src is zero it means the record is not a memory operation, + * skip to synthesize memory sample for this case. + */ + if (spe->sample_memory && data_src) { err = arm_spe__synth_mem_sample(speq, spe->memory_id, data_src); if (err) return err; From a3b2470399f679587c45abe56e551caf10becca2 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 20 Jun 2022 01:35:06 -0700 Subject: [PATCH 0061/1056] erspan: do not assume transport header is always set [ Upstream commit 301bd140ed0b24f0da660874c7e8a47dad8c8222 ] Rewrite tests in ip6erspan_tunnel_xmit() and erspan_fb_xmit() to not assume transport header is set. syzbot reported: WARNING: CPU: 0 PID: 1350 at include/linux/skbuff.h:2911 skb_transport_header include/linux/skbuff.h:2911 [inline] WARNING: CPU: 0 PID: 1350 at include/linux/skbuff.h:2911 ip6erspan_tunnel_xmit+0x15af/0x2eb0 net/ipv6/ip6_gre.c:963 Modules linked in: CPU: 0 PID: 1350 Comm: aoe_tx0 Not tainted 5.19.0-rc2-syzkaller-00160-g274295c6e53f #0 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.14.0-2 04/01/2014 RIP: 0010:skb_transport_header include/linux/skbuff.h:2911 [inline] RIP: 0010:ip6erspan_tunnel_xmit+0x15af/0x2eb0 net/ipv6/ip6_gre.c:963 Code: 0f 47 f0 40 88 b5 7f fe ff ff e8 8c 16 4b f9 89 de bf ff ff ff ff e8 a0 12 4b f9 66 83 fb ff 0f 85 1d f1 ff ff e8 71 16 4b f9 <0f> 0b e9 43 f0 ff ff e8 65 16 4b f9 48 8d 85 30 ff ff ff ba 60 00 RSP: 0018:ffffc90005daf910 EFLAGS: 00010293 RAX: 0000000000000000 RBX: 000000000000ffff RCX: 0000000000000000 RDX: ffff88801f032100 RSI: ffffffff882e8d3f RDI: 0000000000000003 RBP: ffffc90005dafab8 R08: 0000000000000003 R09: 000000000000ffff R10: 000000000000ffff R11: 0000000000000000 R12: ffff888024f21d40 R13: 000000000000a288 R14: 00000000000000b0 R15: ffff888025a2e000 FS: 0000000000000000(0000) GS:ffff88802c800000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000001b2e425000 CR3: 000000006d099000 CR4: 0000000000152ef0 Call Trace: __netdev_start_xmit include/linux/netdevice.h:4805 [inline] netdev_start_xmit include/linux/netdevice.h:4819 [inline] xmit_one net/core/dev.c:3588 [inline] dev_hard_start_xmit+0x188/0x880 net/core/dev.c:3604 sch_direct_xmit+0x19f/0xbe0 net/sched/sch_generic.c:342 __dev_xmit_skb net/core/dev.c:3815 [inline] __dev_queue_xmit+0x14a1/0x3900 net/core/dev.c:4219 dev_queue_xmit include/linux/netdevice.h:2994 [inline] tx+0x6a/0xc0 drivers/block/aoe/aoenet.c:63 kthread+0x1e7/0x3b0 drivers/block/aoe/aoecmd.c:1229 kthread+0x2e9/0x3a0 kernel/kthread.c:376 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:302 Fixes: d5db21a3e697 ("erspan: auto detect truncated ipv6 packets.") Reported-by: syzbot Signed-off-by: Eric Dumazet Cc: William Tu Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/ipv4/ip_gre.c | 15 ++++++++++----- net/ipv6/ip6_gre.c | 15 ++++++++++----- 2 files changed, 20 insertions(+), 10 deletions(-) diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index f23528c7753958..fc74a3e3b3e126 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -524,7 +524,6 @@ static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev) int tunnel_hlen; int version; int nhoff; - int thoff; tun_info = skb_tunnel_info(skb); if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) || @@ -558,10 +557,16 @@ static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev) (ntohs(ip_hdr(skb)->tot_len) > skb->len - nhoff)) truncate = true; - thoff = skb_transport_header(skb) - skb_mac_header(skb); - if (skb->protocol == htons(ETH_P_IPV6) && - (ntohs(ipv6_hdr(skb)->payload_len) > skb->len - thoff)) - truncate = true; + if (skb->protocol == htons(ETH_P_IPV6)) { + int thoff; + + if (skb_transport_header_was_set(skb)) + thoff = skb_transport_header(skb) - skb_mac_header(skb); + else + thoff = nhoff + sizeof(struct ipv6hdr); + if (ntohs(ipv6_hdr(skb)->payload_len) > skb->len - thoff) + truncate = true; + } if (version == 1) { erspan_build_header(skb, ntohl(tunnel_id_to_key32(key->tun_id)), diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index a817ac6d975983..70ef4d4ebff482 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -944,7 +944,6 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb, __be16 proto; __u32 mtu; int nhoff; - int thoff; if (!pskb_inet_may_pull(skb)) goto tx_err; @@ -965,10 +964,16 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb, (ntohs(ip_hdr(skb)->tot_len) > skb->len - nhoff)) truncate = true; - thoff = skb_transport_header(skb) - skb_mac_header(skb); - if (skb->protocol == htons(ETH_P_IPV6) && - (ntohs(ipv6_hdr(skb)->payload_len) > skb->len - thoff)) - truncate = true; + if (skb->protocol == htons(ETH_P_IPV6)) { + int thoff; + + if (skb_transport_header_was_set(skb)) + thoff = skb_transport_header(skb) - skb_mac_header(skb); + else + thoff = nhoff + sizeof(struct ipv6hdr); + if (ntohs(ipv6_hdr(skb)->payload_len) > skb->len - thoff) + truncate = true; + } if (skb_cow_head(skb, dev->needed_headroom ?: t->hlen)) goto tx_err; From be64f54a0db2dbf399f3bca45458d06c833641f0 Mon Sep 17 00:00:00 2001 From: Ziyang Xuan Date: Mon, 20 Jun 2022 12:35:08 +0800 Subject: [PATCH 0062/1056] net/tls: fix tls_sk_proto_close executed repeatedly [ Upstream commit 69135c572d1f84261a6de2a1268513a7e71753e2 ] After setting the sock ktls, update ctx->sk_proto to sock->sk_prot by tls_update(), so now ctx->sk_proto->close is tls_sk_proto_close(). When close the sock, tls_sk_proto_close() is called for sock->sk_prot->close is tls_sk_proto_close(). But ctx->sk_proto->close() will be executed later in tls_sk_proto_close(). Thus tls_sk_proto_close() executed repeatedly occurred. That will trigger the following bug. ================================================================= KASAN: null-ptr-deref in range [0x0000000000000010-0x0000000000000017] RIP: 0010:tls_sk_proto_close+0xd8/0xaf0 net/tls/tls_main.c:306 Call Trace: tls_sk_proto_close+0x356/0xaf0 net/tls/tls_main.c:329 inet_release+0x12e/0x280 net/ipv4/af_inet.c:428 __sock_release+0xcd/0x280 net/socket.c:650 sock_close+0x18/0x20 net/socket.c:1365 Updating a proto which is same with sock->sk_prot is incorrect. Add proto and sock->sk_prot equality check at the head of tls_update() to fix it. Fixes: 95fa145479fb ("bpf: sockmap/tls, close can race with map free") Reported-by: syzbot+29c3c12f3214b85ad081@syzkaller.appspotmail.com Signed-off-by: Ziyang Xuan Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/tls/tls_main.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index 9aac9c60d786db..057c1af6182a2a 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -790,6 +790,9 @@ static void tls_update(struct sock *sk, struct proto *p, { struct tls_context *ctx; + if (sk->sk_prot == p) + return; + ctx = tls_get_ctx(sk); if (likely(ctx)) { ctx->sk_write_space = write_space; From 5b45535865d62633e3816ee30eb8d3213038dc17 Mon Sep 17 00:00:00 2001 From: Gerd Hoffmann Date: Mon, 20 Jun 2022 09:15:47 +0200 Subject: [PATCH 0063/1056] udmabuf: add back sanity check [ Upstream commit 05b252cccb2e5c3f56119d25de684b4f810ba40a ] Check vm_fault->pgoff before using it. When we removed the warning, we also removed the check. Fixes: 7b26e4e2119d ("udmabuf: drop WARN_ON() check.") Reported-by: zdi-disclosures@trendmicro.com Suggested-by: Linus Torvalds Signed-off-by: Gerd Hoffmann Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- drivers/dma-buf/udmabuf.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/dma-buf/udmabuf.c b/drivers/dma-buf/udmabuf.c index e7330684d3b824..9631f2fd2faf7f 100644 --- a/drivers/dma-buf/udmabuf.c +++ b/drivers/dma-buf/udmabuf.c @@ -32,8 +32,11 @@ static vm_fault_t udmabuf_vm_fault(struct vm_fault *vmf) { struct vm_area_struct *vma = vmf->vma; struct udmabuf *ubuf = vma->vm_private_data; + pgoff_t pgoff = vmf->pgoff; - vmf->page = ubuf->pages[vmf->pgoff]; + if (pgoff >= ubuf->pagecount) + return VM_FAULT_SIGBUS; + vmf->page = ubuf->pages[pgoff]; get_page(vmf->page); return 0; } From 0315bd8ad0ddcc3c55cca5015777490f92f0db25 Mon Sep 17 00:00:00 2001 From: Jie2x Zhou Date: Thu, 16 Jun 2022 15:40:46 +0800 Subject: [PATCH 0064/1056] selftests: netfilter: correct PKTGEN_SCRIPT_PATHS in nft_concat_range.sh [ Upstream commit 5d79d8af8dec58bf709b3124d09d9572edd9c617 ] Before change: make -C netfilter TEST: performance net,port [SKIP] perf not supported port,net [SKIP] perf not supported net6,port [SKIP] perf not supported port,proto [SKIP] perf not supported net6,port,mac [SKIP] perf not supported net6,port,mac,proto [SKIP] perf not supported net,mac [SKIP] perf not supported After change: net,mac [ OK ] baseline (drop from netdev hook): 2061098pps baseline hash (non-ranged entries): 1606741pps baseline rbtree (match on first field only): 1191607pps set with 1000 full, ranged entries: 1639119pps ok 8 selftests: netfilter: nft_concat_range.sh Fixes: 611973c1e06f ("selftests: netfilter: Introduce tests for sets with range concatenation") Reported-by: kernel test robot Signed-off-by: Jie2x Zhou Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- tools/testing/selftests/netfilter/nft_concat_range.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/netfilter/nft_concat_range.sh b/tools/testing/selftests/netfilter/nft_concat_range.sh index b5eef5ffb58e5f..af3461cb5c4096 100755 --- a/tools/testing/selftests/netfilter/nft_concat_range.sh +++ b/tools/testing/selftests/netfilter/nft_concat_range.sh @@ -31,7 +31,7 @@ BUGS="flush_remove_add reload" # List of possible paths to pktgen script from kernel tree for performance tests PKTGEN_SCRIPT_PATHS=" - ../../../samples/pktgen/pktgen_bench_xmit_mode_netif_receive.sh + ../../../../samples/pktgen/pktgen_bench_xmit_mode_netif_receive.sh pktgen/pktgen_bench_xmit_mode_netif_receive.sh" # Definition of set types: From ffa12a326415dfe5fc21e66d9d2b86896b4c9eaf Mon Sep 17 00:00:00 2001 From: Jason Andryuk Date: Wed, 1 Jun 2022 15:53:41 -0400 Subject: [PATCH 0065/1056] xen-blkfront: Handle NULL gendisk MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit f9710c357e5bbf64d7ce45ba0bc75a52222491c1 ] When a VBD is not fully created and then closed, the kernel can have a NULL pointer dereference: The reproducer is trivial: [user@dom0 ~]$ sudo xl block-attach work backend=sys-usb vdev=xvdi target=/dev/sdz [user@dom0 ~]$ xl block-list work Vdev BE handle state evt-ch ring-ref BE-path 51712 0 241 4 -1 -1 /local/domain/0/backend/vbd/241/51712 51728 0 241 4 -1 -1 /local/domain/0/backend/vbd/241/51728 51744 0 241 4 -1 -1 /local/domain/0/backend/vbd/241/51744 51760 0 241 4 -1 -1 /local/domain/0/backend/vbd/241/51760 51840 3 241 3 -1 -1 /local/domain/3/backend/vbd/241/51840 ^ note state, the /dev/sdz doesn't exist in the backend [user@dom0 ~]$ sudo xl block-detach work xvdi [user@dom0 ~]$ xl block-list work Vdev BE handle state evt-ch ring-ref BE-path work is an invalid domain identifier And its console has: BUG: kernel NULL pointer dereference, address: 0000000000000050 PGD 80000000edebb067 P4D 80000000edebb067 PUD edec2067 PMD 0 Oops: 0000 [#1] PREEMPT SMP PTI CPU: 1 PID: 52 Comm: xenwatch Not tainted 5.16.18-2.43.fc32.qubes.x86_64 #1 RIP: 0010:blk_mq_stop_hw_queues+0x5/0x40 Code: 00 48 83 e0 fd 83 c3 01 48 89 85 a8 00 00 00 41 39 5c 24 50 77 c0 5b 5d 41 5c 41 5d c3 c3 0f 1f 80 00 00 00 00 0f 1f 44 00 00 <8b> 47 50 85 c0 74 32 41 54 49 89 fc 55 53 31 db 49 8b 44 24 48 48 RSP: 0018:ffffc90000bcfe98 EFLAGS: 00010293 RAX: ffffffffc0008370 RBX: 0000000000000005 RCX: 0000000000000000 RDX: 0000000000000000 RSI: 0000000000000005 RDI: 0000000000000000 RBP: ffff88800775f000 R08: 0000000000000001 R09: ffff888006e620b8 R10: ffff888006e620b0 R11: f000000000000000 R12: ffff8880bff39000 R13: ffff8880bff39000 R14: 0000000000000000 R15: ffff88800604be00 FS: 0000000000000000(0000) GS:ffff8880f3300000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000050 CR3: 00000000e932e002 CR4: 00000000003706e0 Call Trace: blkback_changed+0x95/0x137 [xen_blkfront] ? read_reply+0x160/0x160 xenwatch_thread+0xc0/0x1a0 ? do_wait_intr_irq+0xa0/0xa0 kthread+0x16b/0x190 ? set_kthread_struct+0x40/0x40 ret_from_fork+0x22/0x30 Modules linked in: snd_seq_dummy snd_hrtimer snd_seq snd_seq_device snd_timer snd soundcore ipt_REJECT nf_reject_ipv4 xt_state xt_conntrack nft_counter nft_chain_nat xt_MASQUERADE nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 nft_compat nf_tables nfnetlink intel_rapl_msr intel_rapl_common crct10dif_pclmul crc32_pclmul crc32c_intel ghash_clmulni_intel xen_netfront pcspkr xen_scsiback target_core_mod xen_netback xen_privcmd xen_gntdev xen_gntalloc xen_blkback xen_evtchn ipmi_devintf ipmi_msghandler fuse bpf_preload ip_tables overlay xen_blkfront CR2: 0000000000000050 ---[ end trace 7bc9597fd06ae89d ]--- RIP: 0010:blk_mq_stop_hw_queues+0x5/0x40 Code: 00 48 83 e0 fd 83 c3 01 48 89 85 a8 00 00 00 41 39 5c 24 50 77 c0 5b 5d 41 5c 41 5d c3 c3 0f 1f 80 00 00 00 00 0f 1f 44 00 00 <8b> 47 50 85 c0 74 32 41 54 49 89 fc 55 53 31 db 49 8b 44 24 48 48 RSP: 0018:ffffc90000bcfe98 EFLAGS: 00010293 RAX: ffffffffc0008370 RBX: 0000000000000005 RCX: 0000000000000000 RDX: 0000000000000000 RSI: 0000000000000005 RDI: 0000000000000000 RBP: ffff88800775f000 R08: 0000000000000001 R09: ffff888006e620b8 R10: ffff888006e620b0 R11: f000000000000000 R12: ffff8880bff39000 R13: ffff8880bff39000 R14: 0000000000000000 R15: ffff88800604be00 FS: 0000000000000000(0000) GS:ffff8880f3300000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000050 CR3: 00000000e932e002 CR4: 00000000003706e0 Kernel panic - not syncing: Fatal exception Kernel Offset: disabled info->rq and info->gd are only set in blkfront_connect(), which is called for state 4 (XenbusStateConnected). Guard against using NULL variables in blkfront_closing() to avoid the issue. The rest of blkfront_closing looks okay. If info->nr_rings is 0, then for_each_rinfo won't do anything. blkfront_remove also needs to check for non-NULL pointers before cleaning up the gendisk and request queue. Fixes: 05d69d950d9d "xen-blkfront: sanitize the removal state machine" Reported-by: Marek Marczykowski-Górecki Signed-off-by: Jason Andryuk Reviewed-by: Juergen Gross Link: https://lore.kernel.org/r/20220601195341.28581-1-jandryuk@gmail.com Signed-off-by: Juergen Gross Signed-off-by: Sasha Levin --- drivers/block/xen-blkfront.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 390817cf12212f..d7a9bf43fb32ee 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -2140,9 +2140,11 @@ static void blkfront_closing(struct blkfront_info *info) return; /* No more blkif_request(). */ - blk_mq_stop_hw_queues(info->rq); - blk_mark_disk_dead(info->gd); - set_capacity(info->gd, 0); + if (info->rq && info->gd) { + blk_mq_stop_hw_queues(info->rq); + blk_mark_disk_dead(info->gd); + set_capacity(info->gd, 0); + } for_each_rinfo(info, rinfo, i) { /* No more gnttab callback work. */ @@ -2478,16 +2480,19 @@ static int blkfront_remove(struct xenbus_device *xbdev) dev_dbg(&xbdev->dev, "%s removed", xbdev->nodename); - del_gendisk(info->gd); + if (info->gd) + del_gendisk(info->gd); mutex_lock(&blkfront_mutex); list_del(&info->info_list); mutex_unlock(&blkfront_mutex); blkif_free(info, 0); - xlbd_release_minors(info->gd->first_minor, info->gd->minors); - blk_cleanup_disk(info->gd); - blk_mq_free_tag_set(&info->tag_set); + if (info->gd) { + xlbd_release_minors(info->gd->first_minor, info->gd->minors); + blk_cleanup_disk(info->gd); + blk_mq_free_tag_set(&info->tag_set); + } kfree(info); return 0; From 3e2c9ee9c1e681277778e4b512f3b04bd0928e15 Mon Sep 17 00:00:00 2001 From: Julien Grall Date: Fri, 17 Jun 2022 11:30:37 +0100 Subject: [PATCH 0066/1056] x86/xen: Remove undefined behavior in setup_features() [ Upstream commit ecb6237fa397b7b810d798ad19322eca466dbab1 ] 1 << 31 is undefined. So switch to 1U << 31. Fixes: 5ead97c84fa7 ("xen: Core Xen implementation") Signed-off-by: Julien Grall Reviewed-by: Juergen Gross Link: https://lore.kernel.org/r/20220617103037.57828-1-julien@xen.org Signed-off-by: Juergen Gross Signed-off-by: Sasha Levin --- drivers/xen/features.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/xen/features.c b/drivers/xen/features.c index 7b591443833c97..87f1828d40d5ed 100644 --- a/drivers/xen/features.c +++ b/drivers/xen/features.c @@ -42,7 +42,7 @@ void xen_setup_features(void) if (HYPERVISOR_xen_version(XENVER_get_features, &fi) < 0) break; for (j = 0; j < 32; j++) - xen_features[i * 32 + j] = !!(fi.submap & 1< Date: Fri, 10 Jun 2022 19:14:20 +0800 Subject: [PATCH 0067/1056] MIPS: Remove repetitive increase irq_err_count [ Upstream commit c81aba8fde2aee4f5778ebab3a1d51bd2ef48e4c ] commit 979934da9e7a ("[PATCH] mips: update IRQ handling for vr41xx") added a function irq_dispatch, and it'll increase irq_err_count when the get_irq callback returns a negative value, but increase irq_err_count in get_irq was not removed. And also, modpost complains once gpio-vr41xx drivers become modules. ERROR: modpost: "irq_err_count" [drivers/gpio/gpio-vr41xx.ko] undefined! So it would be a good idea to remove repetitive increase irq_err_count in get_irq callback. Fixes: 27fdd325dace ("MIPS: Update VR41xx GPIO driver to use gpiolib") Fixes: 979934da9e7a ("[PATCH] mips: update IRQ handling for vr41xx") Reported-by: k2ci Signed-off-by: huhai Signed-off-by: Genjian Zhang Signed-off-by: Thomas Bogendoerfer Signed-off-by: Sasha Levin --- arch/mips/vr41xx/common/icu.c | 2 -- drivers/gpio/gpio-vr41xx.c | 2 -- 2 files changed, 4 deletions(-) diff --git a/arch/mips/vr41xx/common/icu.c b/arch/mips/vr41xx/common/icu.c index 7b7f25b4b057e4..9240bcdbe74e4b 100644 --- a/arch/mips/vr41xx/common/icu.c +++ b/arch/mips/vr41xx/common/icu.c @@ -640,8 +640,6 @@ static int icu_get_irq(unsigned int irq) printk(KERN_ERR "spurious ICU interrupt: %04x,%04x\n", pend1, pend2); - atomic_inc(&irq_err_count); - return -1; } diff --git a/drivers/gpio/gpio-vr41xx.c b/drivers/gpio/gpio-vr41xx.c index 98cd715ccc33cd..8d09b619c16690 100644 --- a/drivers/gpio/gpio-vr41xx.c +++ b/drivers/gpio/gpio-vr41xx.c @@ -217,8 +217,6 @@ static int giu_get_irq(unsigned int irq) printk(KERN_ERR "spurious GIU interrupt: %04x(%04x),%04x(%04x)\n", maskl, pendl, maskh, pendh); - atomic_inc(&irq_err_count); - return -EINVAL; } From 2b2bba96526f25f2eba74ecadb031de2e05a83ce Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 21 Jun 2022 15:59:57 +0100 Subject: [PATCH 0068/1056] afs: Fix dynamic root getattr [ Upstream commit cb78d1b5efffe4cf97e16766329dd7358aed3deb ] The recent patch to make afs_getattr consult the server didn't account for the pseudo-inodes employed by the dynamic root-type afs superblock not having a volume or a server to access, and thus an oops occurs if such a directory is stat'd. Fix this by checking to see if the vnode->volume pointer actually points anywhere before following it in afs_getattr(). This can be tested by stat'ing a directory in /afs. It may be sufficient just to do "ls /afs" and the oops looks something like: BUG: kernel NULL pointer dereference, address: 0000000000000020 ... RIP: 0010:afs_getattr+0x8b/0x14b ... Call Trace: vfs_statx+0x79/0xf5 vfs_fstatat+0x49/0x62 Fixes: 2aeb8c86d499 ("afs: Fix afs_getattr() to refetch file status if callback break occurred") Reported-by: Marc Dionne Signed-off-by: David Howells Reviewed-by: Marc Dionne Tested-by: Marc Dionne cc: linux-afs@lists.infradead.org Link: https://lore.kernel.org/r/165408450783.1031787.7941404776393751186.stgit@warthog.procyon.org.uk/ Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- fs/afs/inode.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/afs/inode.c b/fs/afs/inode.c index a47666ba48f569..785bacb972da54 100644 --- a/fs/afs/inode.c +++ b/fs/afs/inode.c @@ -733,7 +733,8 @@ int afs_getattr(struct user_namespace *mnt_userns, const struct path *path, _enter("{ ino=%lu v=%u }", inode->i_ino, inode->i_generation); - if (!(query_flags & AT_STATX_DONT_SYNC) && + if (vnode->volume && + !(query_flags & AT_STATX_DONT_SYNC) && !test_bit(AFS_VNODE_CB_PROMISED, &vnode->flags)) { key = afs_request_key(vnode->volume->cell); if (IS_ERR(key)) From 20229bb9954586fdef301f8db78d32fed7d6bcb2 Mon Sep 17 00:00:00 2001 From: Anatolii Gerasymenko Date: Mon, 20 Jun 2022 09:47:05 +0200 Subject: [PATCH 0069/1056] ice: ethtool: advertise 1000M speeds properly [ Upstream commit c3d184c83ff4b80167e34edfc3d21df424bf27ff ] In current implementation ice_update_phy_type enables all link modes for selected speed. This approach doesn't work for 1000M speeds, because both copper (1000baseT) and optical (1000baseX) standards cannot be enabled at once. Fix this, by adding the function `ice_set_phy_type_from_speed()` for 1000M speeds. Fixes: 48cb27f2fd18 ("ice: Implement handlers for ethtool PHY/link operations") Signed-off-by: Anatolii Gerasymenko Tested-by: Gurucharan (A Contingent worker at Intel) Signed-off-by: Tony Nguyen Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/ice/ice_ethtool.c | 39 +++++++++++++++++++- 1 file changed, 38 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c index 19f115402969e0..982db894754f7e 100644 --- a/drivers/net/ethernet/intel/ice/ice_ethtool.c +++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c @@ -2150,6 +2150,42 @@ ice_setup_autoneg(struct ice_port_info *p, struct ethtool_link_ksettings *ks, return err; } +/** + * ice_set_phy_type_from_speed - set phy_types based on speeds + * and advertised modes + * @ks: ethtool link ksettings struct + * @phy_type_low: pointer to the lower part of phy_type + * @phy_type_high: pointer to the higher part of phy_type + * @adv_link_speed: targeted link speeds bitmap + */ +static void +ice_set_phy_type_from_speed(const struct ethtool_link_ksettings *ks, + u64 *phy_type_low, u64 *phy_type_high, + u16 adv_link_speed) +{ + /* Handle 1000M speed in a special way because ice_update_phy_type + * enables all link modes, but having mixed copper and optical + * standards is not supported. + */ + adv_link_speed &= ~ICE_AQ_LINK_SPEED_1000MB; + + if (ethtool_link_ksettings_test_link_mode(ks, advertising, + 1000baseT_Full)) + *phy_type_low |= ICE_PHY_TYPE_LOW_1000BASE_T | + ICE_PHY_TYPE_LOW_1G_SGMII; + + if (ethtool_link_ksettings_test_link_mode(ks, advertising, + 1000baseKX_Full)) + *phy_type_low |= ICE_PHY_TYPE_LOW_1000BASE_KX; + + if (ethtool_link_ksettings_test_link_mode(ks, advertising, + 1000baseX_Full)) + *phy_type_low |= ICE_PHY_TYPE_LOW_1000BASE_SX | + ICE_PHY_TYPE_LOW_1000BASE_LX; + + ice_update_phy_type(phy_type_low, phy_type_high, adv_link_speed); +} + /** * ice_set_link_ksettings - Set Speed and Duplex * @netdev: network interface device structure @@ -2286,7 +2322,8 @@ ice_set_link_ksettings(struct net_device *netdev, adv_link_speed = curr_link_speed; /* Convert the advertise link speeds to their corresponded PHY_TYPE */ - ice_update_phy_type(&phy_type_low, &phy_type_high, adv_link_speed); + ice_set_phy_type_from_speed(ks, &phy_type_low, &phy_type_high, + adv_link_speed); if (!autoneg_changed && adv_link_speed == curr_link_speed) { netdev_info(netdev, "Nothing changed, exiting without setting anything.\n"); From b0581f93cf05010549c2abe1c04dc6d6f5cafdf9 Mon Sep 17 00:00:00 2001 From: Aidan MacDonald Date: Mon, 20 Jun 2022 21:05:56 +0100 Subject: [PATCH 0070/1056] regmap-irq: Fix a bug in regmap_irq_enable() for type_in_mask chips [ Upstream commit 485037ae9a095491beb7f893c909a76cc4f9d1e7 ] When enabling a type_in_mask irq, the type_buf contents must be AND'd with the mask of the IRQ we're enabling to avoid enabling other IRQs by accident, which can happen if several type_in_mask irqs share a mask register. Fixes: bc998a730367 ("regmap: irq: handle HW using separate rising/falling edge interrupts") Signed-off-by: Aidan MacDonald Link: https://lore.kernel.org/r/20220620200644.1961936-2-aidanmacdonald.0x0@gmail.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- drivers/base/regmap/regmap-irq.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/base/regmap/regmap-irq.c b/drivers/base/regmap/regmap-irq.c index 4a446259a184e9..cd12078ed51b1f 100644 --- a/drivers/base/regmap/regmap-irq.c +++ b/drivers/base/regmap/regmap-irq.c @@ -252,6 +252,7 @@ static void regmap_irq_enable(struct irq_data *data) struct regmap_irq_chip_data *d = irq_data_get_irq_chip_data(data); struct regmap *map = d->map; const struct regmap_irq *irq_data = irq_to_regmap_irq(d, data->hwirq); + unsigned int reg = irq_data->reg_offset / map->reg_stride; unsigned int mask, type; type = irq_data->type.type_falling_val | irq_data->type.type_rising_val; @@ -268,14 +269,14 @@ static void regmap_irq_enable(struct irq_data *data) * at the corresponding offset in regmap_irq_set_type(). */ if (d->chip->type_in_mask && type) - mask = d->type_buf[irq_data->reg_offset / map->reg_stride]; + mask = d->type_buf[reg] & irq_data->mask; else mask = irq_data->mask; if (d->chip->clear_on_unmask) d->clear_status = true; - d->mask_buf[irq_data->reg_offset / map->reg_stride] &= ~mask; + d->mask_buf[reg] &= ~mask; } static void regmap_irq_disable(struct irq_data *data) From 60fd29f1ff46370c9c0a72aeedf4f40f2dea9f93 Mon Sep 17 00:00:00 2001 From: Aidan MacDonald Date: Mon, 20 Jun 2022 21:05:57 +0100 Subject: [PATCH 0071/1056] regmap-irq: Fix offset/index mismatch in read_sub_irq_data() [ Upstream commit 3f05010f243be06478a9b11cfce0ce994f5a0890 ] We need to divide the sub-irq status register offset by register stride to get an index for the status buffer to avoid an out of bounds write when the register stride is greater than 1. Fixes: a2d21848d921 ("regmap: regmap-irq: Add main status register support") Signed-off-by: Aidan MacDonald Link: https://lore.kernel.org/r/20220620200644.1961936-3-aidanmacdonald.0x0@gmail.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- drivers/base/regmap/regmap-irq.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/base/regmap/regmap-irq.c b/drivers/base/regmap/regmap-irq.c index cd12078ed51b1f..3aac960ae30abc 100644 --- a/drivers/base/regmap/regmap-irq.c +++ b/drivers/base/regmap/regmap-irq.c @@ -387,6 +387,7 @@ static inline int read_sub_irq_data(struct regmap_irq_chip_data *data, subreg = &chip->sub_reg_offsets[b]; for (i = 0; i < subreg->num_regs; i++) { unsigned int offset = subreg->offset[i]; + unsigned int index = offset / map->reg_stride; if (chip->not_fixed_stride) ret = regmap_read(map, @@ -395,7 +396,7 @@ static inline int read_sub_irq_data(struct regmap_irq_chip_data *data, else ret = regmap_read(map, chip->status_base + offset, - &data->status_buf[offset]); + &data->status_buf[index]); if (ret) break; From 8eb0223631f8090ce043fadc57eb6fd223a3044b Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Tue, 21 Jun 2022 15:10:56 -0700 Subject: [PATCH 0072/1056] igb: Make DMA faster when CPU is active on the PCIe link [ Upstream commit 4e0effd9007ea0be31f7488611eb3824b4541554 ] Intel I210 on some Intel Alder Lake platforms can only achieve ~750Mbps Tx speed via iperf. The RR2DCDELAY shows around 0x2xxx DMA delay, which will be significantly lower when 1) ASPM is disabled or 2) SoC package c-state stays above PC3. When the RR2DCDELAY is around 0x1xxx the Tx speed can reach to ~950Mbps. According to the I210 datasheet "8.26.1 PCIe Misc. Register - PCIEMISC", "DMA Idle Indication" doesn't seem to tie to DMA coalesce anymore, so set it to 1b for "DMA is considered idle when there is no Rx or Tx AND when there are no TLPs indicating that CPU is active detected on the PCIe link (such as the host executes CSR or Configuration register read or write operation)" and performing Tx should also fall under "active CPU on PCIe link" case. In addition to that, commit b6e0c419f040 ("igb: Move DMA Coalescing init code to separate function.") seems to wrongly changed from enabling E1000_PCIEMISC_LX_DECISION to disabling it, also fix that. Fixes: b6e0c419f040 ("igb: Move DMA Coalescing init code to separate function.") Signed-off-by: Kai-Heng Feng Tested-by: Gurucharan (A Contingent worker at Intel) Signed-off-by: Tony Nguyen Link: https://lore.kernel.org/r/20220621221056.604304-1-anthony.l.nguyen@intel.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/igb/igb_main.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index 5ee5ee8e684826..db11a1c278f699 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -9823,11 +9823,10 @@ static void igb_init_dmac(struct igb_adapter *adapter, u32 pba) struct e1000_hw *hw = &adapter->hw; u32 dmac_thr; u16 hwm; + u32 reg; if (hw->mac.type > e1000_82580) { if (adapter->flags & IGB_FLAG_DMAC) { - u32 reg; - /* force threshold to 0. */ wr32(E1000_DMCTXTH, 0); @@ -9860,7 +9859,6 @@ static void igb_init_dmac(struct igb_adapter *adapter, u32 pba) /* Disable BMC-to-OS Watchdog Enable */ if (hw->mac.type != e1000_i354) reg &= ~E1000_DMACR_DC_BMC2OSW_EN; - wr32(E1000_DMACR, reg); /* no lower threshold to disable @@ -9877,12 +9875,12 @@ static void igb_init_dmac(struct igb_adapter *adapter, u32 pba) */ wr32(E1000_DMCTXTH, (IGB_MIN_TXPBSIZE - (IGB_TX_BUF_4096 + adapter->max_frame_size)) >> 6); + } - /* make low power state decision controlled - * by DMA coal - */ + if (hw->mac.type >= e1000_i210 || + (adapter->flags & IGB_FLAG_DMAC)) { reg = rd32(E1000_PCIEMISC); - reg &= ~E1000_PCIEMISC_LX_DECISION; + reg |= E1000_PCIEMISC_LX_DECISION; wr32(E1000_PCIEMISC, reg); } /* endif adapter->dmac is not disabled */ } else if (hw->mac.type == e1000_82580) { From 8d7fe9ad6fddc2af8bde4b921b4f8fab231ed38c Mon Sep 17 00:00:00 2001 From: Stephan Gerhold Date: Tue, 21 Jun 2022 13:48:44 +0200 Subject: [PATCH 0073/1056] virtio_net: fix xdp_rxq_info bug after suspend/resume [ Upstream commit 8af52fe9fd3bf5e7478da99193c0632276e1dfce ] The following sequence currently causes a driver bug warning when using virtio_net: # ip link set eth0 up # echo mem > /sys/power/state (or e.g. # rtcwake -s 10 -m mem) # ip link set eth0 down Missing register, driver bug WARNING: CPU: 0 PID: 375 at net/core/xdp.c:138 xdp_rxq_info_unreg+0x58/0x60 Call trace: xdp_rxq_info_unreg+0x58/0x60 virtnet_close+0x58/0xac __dev_close_many+0xac/0x140 __dev_change_flags+0xd8/0x210 dev_change_flags+0x24/0x64 do_setlink+0x230/0xdd0 ... This happens because virtnet_freeze() frees the receive_queue completely (including struct xdp_rxq_info) but does not call xdp_rxq_info_unreg(). Similarly, virtnet_restore() sets up the receive_queue again but does not call xdp_rxq_info_reg(). Actually, parts of virtnet_freeze_down() and virtnet_restore_up() are almost identical to virtnet_close() and virtnet_open(): only the calls to xdp_rxq_info_(un)reg() are missing. This means that we can fix this easily and avoid such problems in the future by just calling virtnet_close()/open() from the freeze/restore handlers. Aside from adding the missing xdp_rxq_info calls the only difference is that the refill work is only cancelled if netif_running(). However, this should not make any functional difference since the refill work should only be active if the network interface is actually up. Fixes: 754b8a21a96d ("virtio_net: setup xdp_rxq_info") Signed-off-by: Stephan Gerhold Acked-by: Jesper Dangaard Brouer Acked-by: Jason Wang Link: https://lore.kernel.org/r/20220621114845.3650258-1-stephan.gerhold@kernkonzept.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/virtio_net.c | 25 ++++++------------------- 1 file changed, 6 insertions(+), 19 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 73aba760e10c6c..468d0ffc266b48 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -2431,7 +2431,6 @@ static const struct ethtool_ops virtnet_ethtool_ops = { static void virtnet_freeze_down(struct virtio_device *vdev) { struct virtnet_info *vi = vdev->priv; - int i; /* Make sure no work handler is accessing the device */ flush_work(&vi->config_work); @@ -2439,14 +2438,8 @@ static void virtnet_freeze_down(struct virtio_device *vdev) netif_tx_lock_bh(vi->dev); netif_device_detach(vi->dev); netif_tx_unlock_bh(vi->dev); - cancel_delayed_work_sync(&vi->refill); - - if (netif_running(vi->dev)) { - for (i = 0; i < vi->max_queue_pairs; i++) { - napi_disable(&vi->rq[i].napi); - virtnet_napi_tx_disable(&vi->sq[i].napi); - } - } + if (netif_running(vi->dev)) + virtnet_close(vi->dev); } static int init_vqs(struct virtnet_info *vi); @@ -2454,7 +2447,7 @@ static int init_vqs(struct virtnet_info *vi); static int virtnet_restore_up(struct virtio_device *vdev) { struct virtnet_info *vi = vdev->priv; - int err, i; + int err; err = init_vqs(vi); if (err) @@ -2463,15 +2456,9 @@ static int virtnet_restore_up(struct virtio_device *vdev) virtio_device_ready(vdev); if (netif_running(vi->dev)) { - for (i = 0; i < vi->curr_queue_pairs; i++) - if (!try_fill_recv(vi, &vi->rq[i], GFP_KERNEL)) - schedule_delayed_work(&vi->refill, 0); - - for (i = 0; i < vi->max_queue_pairs; i++) { - virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi); - virtnet_napi_tx_enable(vi, vi->sq[i].vq, - &vi->sq[i].napi); - } + err = virtnet_open(vi->dev); + if (err) + return err; } netif_tx_lock_bh(vi->dev); From eb93999705650a86d4d039e3ff71f99bf1f40c59 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 20 Jun 2022 12:13:52 -0700 Subject: [PATCH 0074/1056] Revert "net/tls: fix tls_sk_proto_close executed repeatedly" [ Upstream commit 1b205d948fbb06a7613d87dcea0ff5fd8a08ed91 ] This reverts commit 69135c572d1f84261a6de2a1268513a7e71753e2. This commit was just papering over the issue, ULP should not get ->update() called with its own sk_prot. Each ULP would need to add this check. Fixes: 69135c572d1f ("net/tls: fix tls_sk_proto_close executed repeatedly") Signed-off-by: Jakub Kicinski Reviewed-by: John Fastabend Link: https://lore.kernel.org/r/20220620191353.1184629-1-kuba@kernel.org Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- net/tls/tls_main.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index 057c1af6182a2a..9aac9c60d786db 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -790,9 +790,6 @@ static void tls_update(struct sock *sk, struct proto *p, { struct tls_context *ctx; - if (sk->sk_prot == p) - return; - ctx = tls_get_ctx(sk); if (likely(ctx)) { ctx->sk_write_space = write_space; From 72fa0f65b56605b8a9ae9fba2082f2123f7fe017 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 20 Jun 2022 12:13:53 -0700 Subject: [PATCH 0075/1056] sock: redo the psock vs ULP protection check [ Upstream commit e34a07c0ae3906f97eb18df50902e2a01c1015b6 ] Commit 8a59f9d1e3d4 ("sock: Introduce sk->sk_prot->psock_update_sk_prot()") has moved the inet_csk_has_ulp(sk) check from sk_psock_init() to the new tcp_bpf_update_proto() function. I'm guessing that this was done to allow creating psocks for non-inet sockets. Unfortunately the destruction path for psock includes the ULP unwind, so we need to fail the sk_psock_init() itself. Otherwise if ULP is already present we'll notice that later, and call tcp_update_ulp() with the sk_proto of the ULP itself, which will most likely result in the ULP looping its callbacks. Fixes: 8a59f9d1e3d4 ("sock: Introduce sk->sk_prot->psock_update_sk_prot()") Signed-off-by: Jakub Kicinski Reviewed-by: John Fastabend Reviewed-by: Jakub Sitnicki Tested-by: Jakub Sitnicki Link: https://lore.kernel.org/r/20220620191353.1184629-2-kuba@kernel.org Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- include/net/inet_sock.h | 5 +++++ net/core/skmsg.c | 5 +++++ net/ipv4/tcp_bpf.c | 3 --- net/tls/tls_main.c | 2 ++ 4 files changed, 12 insertions(+), 3 deletions(-) diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index 9e1111f5915bd0..d81b7f85819ed1 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -252,6 +252,11 @@ struct inet_sock { #define IP_CMSG_CHECKSUM BIT(7) #define IP_CMSG_RECVFRAGSIZE BIT(8) +static inline bool sk_is_inet(struct sock *sk) +{ + return sk->sk_family == AF_INET || sk->sk_family == AF_INET6; +} + /** * sk_to_full_sk - Access to a full socket * @sk: pointer to a socket diff --git a/net/core/skmsg.c b/net/core/skmsg.c index cc381165ea0809..ede0af308f404e 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -695,6 +695,11 @@ struct sk_psock *sk_psock_init(struct sock *sk, int node) write_lock_bh(&sk->sk_callback_lock); + if (sk_is_inet(sk) && inet_csk_has_ulp(sk)) { + psock = ERR_PTR(-EINVAL); + goto out; + } + if (sk->sk_user_data) { psock = ERR_PTR(-EBUSY); goto out; diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c index 1cdcb4df0eb7e7..2c597a4e429aba 100644 --- a/net/ipv4/tcp_bpf.c +++ b/net/ipv4/tcp_bpf.c @@ -612,9 +612,6 @@ int tcp_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore) return 0; } - if (inet_csk_has_ulp(sk)) - return -EINVAL; - if (sk->sk_family == AF_INET6) { if (tcp_bpf_assert_proto_ops(psock->sk_proto)) return -EINVAL; diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index 9aac9c60d786db..62b1c5e32bbd73 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -790,6 +790,8 @@ static void tls_update(struct sock *sk, struct proto *p, { struct tls_context *ctx; + WARN_ON_ONCE(sk->sk_prot == p); + ctx = tls_get_ctx(sk); if (likely(ctx)) { ctx->sk_write_space = write_space; From 1057d42602cb610b06e6ac0495483254e4812d09 Mon Sep 17 00:00:00 2001 From: Enzo Matsumiya Date: Fri, 5 Nov 2021 23:08:57 -0300 Subject: [PATCH 0076/1056] nvme-pci: add NO APST quirk for Kioxia device [ Upstream commit 5a6254d55e2a9f7919ead8580d7aa0c7a382b26a ] This particular Kioxia device times out and aborts I/O during any load, but it's more easily observable with discards (fstrim). The device gets to a state that is also not possible to use "nvme set-feature" to disable APST. Booting with nvme_core.default_ps_max_latency=0 solves the issue. We had a dozen or so of these devices behaving this same way in customer environments. Signed-off-by: Enzo Matsumiya Signed-off-by: Christoph Hellwig Signed-off-by: Sasha Levin --- drivers/nvme/host/core.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 9bc9f6d225bd06..3ef1f9112ee04c 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -2475,6 +2475,20 @@ static const struct nvme_core_quirk_entry core_quirks[] = { .vid = 0x14a4, .fr = "22301111", .quirks = NVME_QUIRK_SIMPLE_SUSPEND, + }, + { + /* + * This Kioxia CD6-V Series / HPE PE8030 device times out and + * aborts I/O during any load, but more easily reproducible + * with discards (fstrim). + * + * The device is left in a state where it is also not possible + * to use "nvme set-feature" to disable APST, but booting with + * nvme_core.default_ps_max_latency=0 works. + */ + .vid = 0x1e0f, + .mn = "KCD6XVUL6T40", + .quirks = NVME_QUIRK_NO_APST, } }; From e3ea126c513c93bc4af1d45044e34e3238ac0577 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 17 Jun 2022 10:29:42 +0200 Subject: [PATCH 0077/1056] nvme: move the Samsung X5 quirk entry to the core quirks [ Upstream commit e6487833182a8a0187f0292aca542fc163ccd03e ] This device shares the PCI ID with the Samsung 970 Evo Plus that does not need or want the quirks. Move the the quirk entry to the core table based on the model number instead. Fixes: bc360b0b1611 ("nvme-pci: add quirks for Samsung X5 SSDs") Signed-off-by: Christoph Hellwig Reviewed-by: Pankaj Raghav Signed-off-by: Sasha Levin --- drivers/nvme/host/core.c | 14 ++++++++++++++ drivers/nvme/host/pci.c | 4 ---- 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 3ef1f9112ee04c..19054b791c6727 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -2489,6 +2489,20 @@ static const struct nvme_core_quirk_entry core_quirks[] = { .vid = 0x1e0f, .mn = "KCD6XVUL6T40", .quirks = NVME_QUIRK_NO_APST, + }, + { + /* + * The external Samsung X5 SSD fails initialization without a + * delay before checking if it is ready and has a whole set of + * other problems. To make this even more interesting, it + * shares the PCI ID with internal Samsung 970 Evo Plus that + * does not need or want these quirks. + */ + .vid = 0x144d, + .mn = "Samsung Portable SSD X5", + .quirks = NVME_QUIRK_DELAY_BEFORE_CHK_RDY | + NVME_QUIRK_NO_DEEPEST_PS | + NVME_QUIRK_IGNORE_DEV_SUBNQN, } }; diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 3ddd24a42043d0..58b8461b2b0fb7 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -3380,10 +3380,6 @@ static const struct pci_device_id nvme_id_table[] = { NVME_QUIRK_128_BYTES_SQES | NVME_QUIRK_SHARED_TAGS | NVME_QUIRK_SKIP_CID_GEN }, - { PCI_DEVICE(0x144d, 0xa808), /* Samsung X5 */ - .driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY| - NVME_QUIRK_NO_DEEPEST_PS | - NVME_QUIRK_IGNORE_DEV_SUBNQN, }, { PCI_DEVICE_CLASS(PCI_CLASS_STORAGE_EXPRESS, 0xffffff) }, { 0, } }; From 09dd5630fea6e103aeec77cf0581591957722aa9 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 23 Jun 2022 11:29:48 +0300 Subject: [PATCH 0078/1056] gpio: winbond: Fix error code in winbond_gpio_get() [ Upstream commit 9ca766eaea2e87b8b773bff04ee56c055cb76d4e ] This error path returns 1, but it should instead propagate the negative error code from winbond_sio_enter(). Fixes: a0d65009411c ("gpio: winbond: Add driver") Signed-off-by: Dan Carpenter Reviewed-by: Andy Shevchenko Signed-off-by: Bartosz Golaszewski Signed-off-by: Sasha Levin --- drivers/gpio/gpio-winbond.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/gpio/gpio-winbond.c b/drivers/gpio/gpio-winbond.c index 7f8f5b02e31d5c..4b61d975cc0ec2 100644 --- a/drivers/gpio/gpio-winbond.c +++ b/drivers/gpio/gpio-winbond.c @@ -385,12 +385,13 @@ static int winbond_gpio_get(struct gpio_chip *gc, unsigned int offset) unsigned long *base = gpiochip_get_data(gc); const struct winbond_gpio_info *info; bool val; + int ret; winbond_gpio_get_info(&offset, &info); - val = winbond_sio_enter(*base); - if (val) - return val; + ret = winbond_sio_enter(*base); + if (ret) + return ret; winbond_sio_select_logical(*base, info->dev); From ff3e50ca9250573d6dbe7fdbed32da7f21d15c3b Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Fri, 10 Jun 2022 15:19:00 +0200 Subject: [PATCH 0079/1056] s390/cpumf: Handle events cycles and instructions identical [ Upstream commit be857b7f77d130dbbd47c91fc35198b040f35865 ] Events CPU_CYCLES and INSTRUCTIONS can be submitted with two different perf_event attribute::type values: - PERF_TYPE_HARDWARE: when invoked via perf tool predefined events name cycles or cpu-cycles or instructions. - pmu->type: when invoked via perf tool event name cpu_cf/CPU_CYLCES/ or cpu_cf/INSTRUCTIONS/. This invocation also selects the PMU to which the event belongs. Handle both type of invocations identical for events CPU_CYLCES and INSTRUCTIONS. They address the same hardware. The result is different when event modifier exclude_kernel is also set. Invocation with event modifier for user space event counting fails. Output before: # perf stat -e cpum_cf/cpu_cycles/u -- true Performance counter stats for 'true': cpum_cf/cpu_cycles/u 0.000761033 seconds time elapsed 0.000076000 seconds user 0.000725000 seconds sys # Output after: # perf stat -e cpum_cf/cpu_cycles/u -- true Performance counter stats for 'true': 349,613 cpum_cf/cpu_cycles/u 0.000844143 seconds time elapsed 0.000079000 seconds user 0.000800000 seconds sys # Fixes: 6a82e23f45fe ("s390/cpumf: Adjust registration of s390 PMU device drivers") Signed-off-by: Thomas Richter Acked-by: Sumanth Korikkar [agordeev@linux.ibm.com corrected commit ID of Fixes commit] Signed-off-by: Alexander Gordeev Signed-off-by: Sasha Levin --- arch/s390/kernel/perf_cpum_cf.c | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c index cceb8ec707e4bb..d2a2a18b55808c 100644 --- a/arch/s390/kernel/perf_cpum_cf.c +++ b/arch/s390/kernel/perf_cpum_cf.c @@ -516,6 +516,26 @@ static int __hw_perf_event_init(struct perf_event *event, unsigned int type) return err; } +/* Events CPU_CYLCES and INSTRUCTIONS can be submitted with two different + * attribute::type values: + * - PERF_TYPE_HARDWARE: + * - pmu->type: + * Handle both type of invocations identical. They address the same hardware. + * The result is different when event modifiers exclude_kernel and/or + * exclude_user are also set. + */ +static int cpumf_pmu_event_type(struct perf_event *event) +{ + u64 ev = event->attr.config; + + if (cpumf_generic_events_basic[PERF_COUNT_HW_CPU_CYCLES] == ev || + cpumf_generic_events_basic[PERF_COUNT_HW_INSTRUCTIONS] == ev || + cpumf_generic_events_user[PERF_COUNT_HW_CPU_CYCLES] == ev || + cpumf_generic_events_user[PERF_COUNT_HW_INSTRUCTIONS] == ev) + return PERF_TYPE_HARDWARE; + return PERF_TYPE_RAW; +} + static int cpumf_pmu_event_init(struct perf_event *event) { unsigned int type = event->attr.type; @@ -525,7 +545,7 @@ static int cpumf_pmu_event_init(struct perf_event *event) err = __hw_perf_event_init(event, type); else if (event->pmu->type == type) /* Registered as unknown PMU */ - err = __hw_perf_event_init(event, PERF_TYPE_RAW); + err = __hw_perf_event_init(event, cpumf_pmu_event_type(event)); else return -ENOENT; From 490dd2dd2a79cf1c98cf721af43e4657ce8d5f59 Mon Sep 17 00:00:00 2001 From: Haibo Chen Date: Mon, 25 Apr 2022 16:41:00 +0800 Subject: [PATCH 0080/1056] iio: mma8452: fix probe fail when device tree compatible is used. [ Upstream commit fe18894930a025617114aa8ca0adbf94d5bffe89 ] Correct the logic for the probe. First check of_match_table, if not meet, then check i2c_driver.id_table. If both not meet, then return fail. Fixes: a47ac019e7e8 ("iio: mma8452: Fix probe failing when an i2c_device_id is used") Signed-off-by: Haibo Chen Link: https://lore.kernel.org/r/1650876060-17577-1-git-send-email-haibo.chen@nxp.com Signed-off-by: Jonathan Cameron Signed-off-by: Sasha Levin --- drivers/iio/accel/mma8452.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/iio/accel/mma8452.c b/drivers/iio/accel/mma8452.c index 21a99467f36467..40faf09f5d875f 100644 --- a/drivers/iio/accel/mma8452.c +++ b/drivers/iio/accel/mma8452.c @@ -1539,11 +1539,13 @@ static int mma8452_probe(struct i2c_client *client, mutex_init(&data->lock); data->chip_info = device_get_match_data(&client->dev); - if (!data->chip_info && id) { - data->chip_info = &mma_chip_info_table[id->driver_data]; - } else { - dev_err(&client->dev, "unknown device model\n"); - return -ENODEV; + if (!data->chip_info) { + if (id) { + data->chip_info = &mma_chip_info_table[id->driver_data]; + } else { + dev_err(&client->dev, "unknown device model\n"); + return -ENODEV; + } } data->vdd_reg = devm_regulator_get(&client->dev, "vdd"); From 466e15f845d7527af9fa380e5abe980c9d69dcf6 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Sun, 1 May 2022 21:50:29 +0200 Subject: [PATCH 0081/1056] iio: magnetometer: yas530: Fix memchr_inv() misuse [ Upstream commit bb52d3691db8cf24cea049235223f3599778f264 ] The call to check if the calibration is all zeroes is doing it wrong: memchr_inv() returns NULL if the the calibration contains all zeroes, but the check is for != NULL. Fix it up. It's probably not an urgent fix because the inner check for BIT(7) in data[13] will save us. But fix it. Fixes: de8860b1ed47 ("iio: magnetometer: Add driver for Yamaha YAS530") Reported-by: Jakob Hauser Cc: Andy Shevchenko Signed-off-by: Linus Walleij Link: https://lore.kernel.org/r/20220501195029.151852-1-linus.walleij@linaro.org Signed-off-by: Jonathan Cameron Signed-off-by: Sasha Levin --- drivers/iio/magnetometer/yamaha-yas530.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/magnetometer/yamaha-yas530.c b/drivers/iio/magnetometer/yamaha-yas530.c index 9ff7b0e56cf67a..b2bc637150bfa8 100644 --- a/drivers/iio/magnetometer/yamaha-yas530.c +++ b/drivers/iio/magnetometer/yamaha-yas530.c @@ -639,7 +639,7 @@ static int yas532_get_calibration_data(struct yas5xx *yas5xx) dev_dbg(yas5xx->dev, "calibration data: %*ph\n", 14, data); /* Sanity check, is this all zeroes? */ - if (memchr_inv(data, 0x00, 13)) { + if (memchr_inv(data, 0x00, 13) == NULL) { if (!(data[13] & BIT(7))) dev_warn(yas5xx->dev, "calibration is blank!\n"); } From 288f30e17513e2af747b7947973020bb2efabee2 Mon Sep 17 00:00:00 2001 From: Baruch Siach Date: Mon, 30 May 2022 11:50:26 +0300 Subject: [PATCH 0082/1056] iio: adc: vf610: fix conversion mode sysfs node name [ Upstream commit f1a633b15cd5371a2a83f02c513984e51132dd68 ] The documentation missed the "in_" prefix for this IIO_SHARED_BY_DIR entry. Fixes: bf04c1a367e3 ("iio: adc: vf610: implement configurable conversion modes") Signed-off-by: Baruch Siach Acked-by: Haibo Chen Link: https://lore.kernel.org/r/560dc93fafe5ef7e9a409885fd20b6beac3973d8.1653900626.git.baruch@tkos.co.il Signed-off-by: Jonathan Cameron Signed-off-by: Sasha Levin --- Documentation/ABI/testing/sysfs-bus-iio-vf610 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/ABI/testing/sysfs-bus-iio-vf610 b/Documentation/ABI/testing/sysfs-bus-iio-vf610 index 308a6756d3bf3f..491ead80448881 100644 --- a/Documentation/ABI/testing/sysfs-bus-iio-vf610 +++ b/Documentation/ABI/testing/sysfs-bus-iio-vf610 @@ -1,4 +1,4 @@ -What: /sys/bus/iio/devices/iio:deviceX/conversion_mode +What: /sys/bus/iio/devices/iio:deviceX/in_conversion_mode KernelVersion: 4.2 Contact: linux-iio@vger.kernel.org Description: From 9509a175a560bb85605f22b0ff73c9bdb2ab63a2 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 20 Jun 2022 13:43:16 +0300 Subject: [PATCH 0083/1056] usb: typec: wcove: Drop wrong dependency to INTEL_SOC_PMIC [ Upstream commit 9ef165406308515dcf2e3f6e97b39a1c56d86db5 ] Intel SoC PMIC is a generic name for all PMICs that are used on Intel platforms. In particular, INTEL_SOC_PMIC kernel configuration option refers to Crystal Cove PMIC, which has never been a part of any Intel Broxton hardware. Drop wrong dependency from Kconfig. Note, the correct dependency is satisfied via ACPI PMIC OpRegion driver, which the Type-C depends on. Fixes: d2061f9cc32d ("usb: typec: add driver for Intel Whiskey Cove PMIC USB Type-C PHY") Reported-by: Hans de Goede Reviewed-by: Guenter Roeck Reviewed-by: Heikki Krogerus Reviewed-by: Hans de Goede Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20220620104316.57592-1-andriy.shevchenko@linux.intel.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/usb/typec/tcpm/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/usb/typec/tcpm/Kconfig b/drivers/usb/typec/tcpm/Kconfig index 557f392fe24da9..073fd2ea5e0bbd 100644 --- a/drivers/usb/typec/tcpm/Kconfig +++ b/drivers/usb/typec/tcpm/Kconfig @@ -56,7 +56,6 @@ config TYPEC_WCOVE tristate "Intel WhiskeyCove PMIC USB Type-C PHY driver" depends on ACPI depends on MFD_INTEL_PMC_BXT - depends on INTEL_SOC_PMIC depends on BXT_WC_PMIC_OPREGION help This driver adds support for USB Type-C on Intel Broxton platforms From c1c78d4d9b0aa155574d7ec09b0408e28d1717f7 Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Thu, 23 Jun 2022 14:19:43 +0300 Subject: [PATCH 0084/1056] xhci: turn off port power in shutdown commit 83810f84ecf11dfc5a9414a8b762c3501b328185 upstream. If ports are not turned off in shutdown then runtime suspended self-powered USB devices may survive in U3 link state over S5. During subsequent boot, if firmware sends an IPC command to program the port in DISCONNECT state, it will time out, causing significant delay in the boot time. Turning off roothub port power is also recommended in xhci specification 4.19.4 "Port Power" in the additional note. Cc: stable@vger.kernel.org Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20220623111945.1557702-3-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-hub.c | 2 +- drivers/usb/host/xhci.c | 15 +++++++++++++-- drivers/usb/host/xhci.h | 2 ++ 3 files changed, 16 insertions(+), 3 deletions(-) diff --git a/drivers/usb/host/xhci-hub.c b/drivers/usb/host/xhci-hub.c index f65f1ba2b59292..fc322a9526c8c6 100644 --- a/drivers/usb/host/xhci-hub.c +++ b/drivers/usb/host/xhci-hub.c @@ -652,7 +652,7 @@ struct xhci_hub *xhci_get_rhub(struct usb_hcd *hcd) * It will release and re-aquire the lock while calling ACPI * method. */ -static void xhci_set_port_power(struct xhci_hcd *xhci, struct usb_hcd *hcd, +void xhci_set_port_power(struct xhci_hcd *xhci, struct usb_hcd *hcd, u16 index, bool on, unsigned long *flags) __must_hold(&xhci->lock) { diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index 90f5a3ce7c3489..94fe7d64e762b2 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -774,6 +774,8 @@ static void xhci_stop(struct usb_hcd *hcd) void xhci_shutdown(struct usb_hcd *hcd) { struct xhci_hcd *xhci = hcd_to_xhci(hcd); + unsigned long flags; + int i; if (xhci->quirks & XHCI_SPURIOUS_REBOOT) usb_disable_xhci_ports(to_pci_dev(hcd->self.sysdev)); @@ -789,12 +791,21 @@ void xhci_shutdown(struct usb_hcd *hcd) del_timer_sync(&xhci->shared_hcd->rh_timer); } - spin_lock_irq(&xhci->lock); + spin_lock_irqsave(&xhci->lock, flags); xhci_halt(xhci); + + /* Power off USB2 ports*/ + for (i = 0; i < xhci->usb2_rhub.num_ports; i++) + xhci_set_port_power(xhci, xhci->main_hcd, i, false, &flags); + + /* Power off USB3 ports*/ + for (i = 0; i < xhci->usb3_rhub.num_ports; i++) + xhci_set_port_power(xhci, xhci->shared_hcd, i, false, &flags); + /* Workaround for spurious wakeups at shutdown with HSW */ if (xhci->quirks & XHCI_SPURIOUS_WAKEUP) xhci_reset(xhci, XHCI_RESET_SHORT_USEC); - spin_unlock_irq(&xhci->lock); + spin_unlock_irqrestore(&xhci->lock, flags); xhci_cleanup_msix(xhci); diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h index bc0789229527ff..79fa34f1e31c49 100644 --- a/drivers/usb/host/xhci.h +++ b/drivers/usb/host/xhci.h @@ -2174,6 +2174,8 @@ int xhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue, u16 wIndex, int xhci_hub_status_data(struct usb_hcd *hcd, char *buf); int xhci_find_raw_port_number(struct usb_hcd *hcd, int port1); struct xhci_hub *xhci_get_rhub(struct usb_hcd *hcd); +void xhci_set_port_power(struct xhci_hcd *xhci, struct usb_hcd *hcd, u16 index, + bool on, unsigned long *flags); void xhci_hc_died(struct xhci_hcd *xhci); From cfa16dd21be06b319f8524b44a5e503b2c451b1b Mon Sep 17 00:00:00 2001 From: Tanveer Alam Date: Thu, 23 Jun 2022 14:19:44 +0300 Subject: [PATCH 0085/1056] xhci-pci: Allow host runtime PM as default for Intel Raptor Lake xHCI commit 7516da47a349e74de623243a27f9b8a91446bf4f upstream. In the same way as Intel Alder Lake TCSS (Type-C Subsystem) the Raptor Lake TCSS xHCI needs to be runtime suspended whenever possible to allow the TCSS hardware block to enter D3cold and thus save energy. Cc: stable@kernel.org Signed-off-by: Tanveer Alam Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20220623111945.1557702-4-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-pci.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c index 7d13ddff6b33f1..c129b08146a633 100644 --- a/drivers/usb/host/xhci-pci.c +++ b/drivers/usb/host/xhci-pci.c @@ -61,6 +61,7 @@ #define PCI_DEVICE_ID_INTEL_ALDER_LAKE_XHCI 0x461e #define PCI_DEVICE_ID_INTEL_ALDER_LAKE_N_XHCI 0x464e #define PCI_DEVICE_ID_INTEL_ALDER_LAKE_PCH_XHCI 0x51ed +#define PCI_DEVICE_ID_INTEL_RAPTOR_LAKE_XHCI 0xa71e #define PCI_DEVICE_ID_AMD_RENOIR_XHCI 0x1639 #define PCI_DEVICE_ID_AMD_PROMONTORYA_4 0x43b9 @@ -270,7 +271,8 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci) pdev->device == PCI_DEVICE_ID_INTEL_MAPLE_RIDGE_XHCI || pdev->device == PCI_DEVICE_ID_INTEL_ALDER_LAKE_XHCI || pdev->device == PCI_DEVICE_ID_INTEL_ALDER_LAKE_N_XHCI || - pdev->device == PCI_DEVICE_ID_INTEL_ALDER_LAKE_PCH_XHCI)) + pdev->device == PCI_DEVICE_ID_INTEL_ALDER_LAKE_PCH_XHCI || + pdev->device == PCI_DEVICE_ID_INTEL_RAPTOR_LAKE_XHCI)) xhci->quirks |= XHCI_DEFAULT_PM_RUNTIME_ALLOW; if (pdev->vendor == PCI_VENDOR_ID_ETRON && From da57f113e817fed31a53838fa7279d6b59dbedaf Mon Sep 17 00:00:00 2001 From: Utkarsh Patel Date: Thu, 23 Jun 2022 14:19:45 +0300 Subject: [PATCH 0086/1056] xhci-pci: Allow host runtime PM as default for Intel Meteor Lake xHCI commit 8ffdc53a60049f3930afe161dc51c67959c8d83d upstream. Meteor Lake TCSS(Type-C Subsystem) xHCI needs to be runtime suspended whenever possible to allow the TCSS hardware block to enter D3cold and thus save energy. Cc: stable@kernel.org Signed-off-by: Utkarsh Patel Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20220623111945.1557702-5-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-pci.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c index c129b08146a633..352626f9e451bc 100644 --- a/drivers/usb/host/xhci-pci.c +++ b/drivers/usb/host/xhci-pci.c @@ -62,6 +62,7 @@ #define PCI_DEVICE_ID_INTEL_ALDER_LAKE_N_XHCI 0x464e #define PCI_DEVICE_ID_INTEL_ALDER_LAKE_PCH_XHCI 0x51ed #define PCI_DEVICE_ID_INTEL_RAPTOR_LAKE_XHCI 0xa71e +#define PCI_DEVICE_ID_INTEL_METEOR_LAKE_XHCI 0x7ec0 #define PCI_DEVICE_ID_AMD_RENOIR_XHCI 0x1639 #define PCI_DEVICE_ID_AMD_PROMONTORYA_4 0x43b9 @@ -272,7 +273,8 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci) pdev->device == PCI_DEVICE_ID_INTEL_ALDER_LAKE_XHCI || pdev->device == PCI_DEVICE_ID_INTEL_ALDER_LAKE_N_XHCI || pdev->device == PCI_DEVICE_ID_INTEL_ALDER_LAKE_PCH_XHCI || - pdev->device == PCI_DEVICE_ID_INTEL_RAPTOR_LAKE_XHCI)) + pdev->device == PCI_DEVICE_ID_INTEL_RAPTOR_LAKE_XHCI || + pdev->device == PCI_DEVICE_ID_INTEL_METEOR_LAKE_XHCI)) xhci->quirks |= XHCI_DEFAULT_PM_RUNTIME_ALLOW; if (pdev->vendor == PCI_VENDOR_ID_ETRON && From 61c3a21ba6f636baeee47a7509154963b23ef999 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Mon, 13 Jun 2022 10:17:03 -0400 Subject: [PATCH 0087/1056] usb: gadget: Fix non-unique driver names in raw-gadget driver commit f2d8c2606825317b77db1f9ba0fc26ef26160b30 upstream. In a report for a separate bug (which has already been fixed by commit 5f0b5f4d50fa "usb: gadget: fix race when gadget driver register via ioctl") in the raw-gadget driver, the syzbot console log included error messages caused by attempted registration of a new driver with the same name as an existing driver: > kobject_add_internal failed for raw-gadget with -EEXIST, don't try to register things with the same name in the same directory. > UDC core: USB Raw Gadget: driver registration failed: -17 > misc raw-gadget: fail, usb_gadget_register_driver returned -17 These errors arise because raw_gadget.c registers a separate UDC driver for each of the UDC instances it creates, but these drivers all have the same name: "raw-gadget". Until recently this wasn't a problem, but when the "gadget" bus was added and UDC drivers were registered on this bus, it became possible for name conflicts to cause the registrations to fail. The reason is simply that the bus code in the driver core uses the driver name as a sysfs directory name (e.g., /sys/bus/gadget/drivers/raw-gadget/), and you can't create two directories with the same pathname. To fix this problem, the driver names used by raw-gadget are made distinct by appending a unique ID number: "raw-gadget.N", with a different value of N for each driver instance. And to avoid the proliferation of error handling code in the raw_ioctl_init() routine, the error return paths are refactored into the common pattern (goto statements leading to cleanup code at the end of the routine). Link: https://lore.kernel.org/all/0000000000008c664105dffae2eb@google.com/ Fixes: fc274c1e9973 "USB: gadget: Add a new bus for gadgets" CC: Andrey Konovalov CC: Reported-and-tested-by: syzbot+02b16343704b3af1667e@syzkaller.appspotmail.com Reviewed-by: Andrey Konovalov Acked-by: Hillf Danton Signed-off-by: Alan Stern Link: https://lore.kernel.org/r/YqdG32w+3h8c1s7z@rowland.harvard.edu Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/legacy/raw_gadget.c | 62 +++++++++++++++++++------- 1 file changed, 46 insertions(+), 16 deletions(-) diff --git a/drivers/usb/gadget/legacy/raw_gadget.c b/drivers/usb/gadget/legacy/raw_gadget.c index 3427ce37a5c5bd..cc8de19ab91661 100644 --- a/drivers/usb/gadget/legacy/raw_gadget.c +++ b/drivers/usb/gadget/legacy/raw_gadget.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -36,6 +37,9 @@ MODULE_LICENSE("GPL"); /*----------------------------------------------------------------------*/ +static DEFINE_IDA(driver_id_numbers); +#define DRIVER_DRIVER_NAME_LENGTH_MAX 32 + #define RAW_EVENT_QUEUE_SIZE 16 struct raw_event_queue { @@ -161,6 +165,9 @@ struct raw_dev { /* Reference to misc device: */ struct device *dev; + /* Make driver names unique */ + int driver_id_number; + /* Protected by lock: */ enum dev_state state; bool gadget_registered; @@ -189,6 +196,7 @@ static struct raw_dev *dev_new(void) spin_lock_init(&dev->lock); init_completion(&dev->ep0_done); raw_event_queue_init(&dev->queue); + dev->driver_id_number = -1; return dev; } @@ -199,6 +207,9 @@ static void dev_free(struct kref *kref) kfree(dev->udc_name); kfree(dev->driver.udc_name); + kfree(dev->driver.driver.name); + if (dev->driver_id_number >= 0) + ida_free(&driver_id_numbers, dev->driver_id_number); if (dev->req) { if (dev->ep0_urb_queued) usb_ep_dequeue(dev->gadget->ep0, dev->req); @@ -422,6 +433,7 @@ static int raw_ioctl_init(struct raw_dev *dev, unsigned long value) struct usb_raw_init arg; char *udc_driver_name; char *udc_device_name; + char *driver_driver_name; unsigned long flags; if (copy_from_user(&arg, (void __user *)value, sizeof(arg))) @@ -440,36 +452,44 @@ static int raw_ioctl_init(struct raw_dev *dev, unsigned long value) return -EINVAL; } + ret = ida_alloc(&driver_id_numbers, GFP_KERNEL); + if (ret < 0) + return ret; + dev->driver_id_number = ret; + + driver_driver_name = kmalloc(DRIVER_DRIVER_NAME_LENGTH_MAX, GFP_KERNEL); + if (!driver_driver_name) { + ret = -ENOMEM; + goto out_free_driver_id_number; + } + snprintf(driver_driver_name, DRIVER_DRIVER_NAME_LENGTH_MAX, + DRIVER_NAME ".%d", dev->driver_id_number); + udc_driver_name = kmalloc(UDC_NAME_LENGTH_MAX, GFP_KERNEL); - if (!udc_driver_name) - return -ENOMEM; + if (!udc_driver_name) { + ret = -ENOMEM; + goto out_free_driver_driver_name; + } ret = strscpy(udc_driver_name, &arg.driver_name[0], UDC_NAME_LENGTH_MAX); - if (ret < 0) { - kfree(udc_driver_name); - return ret; - } + if (ret < 0) + goto out_free_udc_driver_name; ret = 0; udc_device_name = kmalloc(UDC_NAME_LENGTH_MAX, GFP_KERNEL); if (!udc_device_name) { - kfree(udc_driver_name); - return -ENOMEM; + ret = -ENOMEM; + goto out_free_udc_driver_name; } ret = strscpy(udc_device_name, &arg.device_name[0], UDC_NAME_LENGTH_MAX); - if (ret < 0) { - kfree(udc_driver_name); - kfree(udc_device_name); - return ret; - } + if (ret < 0) + goto out_free_udc_device_name; ret = 0; spin_lock_irqsave(&dev->lock, flags); if (dev->state != STATE_DEV_OPENED) { dev_dbg(dev->dev, "fail, device is not opened\n"); - kfree(udc_driver_name); - kfree(udc_device_name); ret = -EINVAL; goto out_unlock; } @@ -484,14 +504,24 @@ static int raw_ioctl_init(struct raw_dev *dev, unsigned long value) dev->driver.suspend = gadget_suspend; dev->driver.resume = gadget_resume; dev->driver.reset = gadget_reset; - dev->driver.driver.name = DRIVER_NAME; + dev->driver.driver.name = driver_driver_name; dev->driver.udc_name = udc_device_name; dev->driver.match_existing_only = 1; dev->state = STATE_DEV_INITIALIZED; + spin_unlock_irqrestore(&dev->lock, flags); + return ret; out_unlock: spin_unlock_irqrestore(&dev->lock, flags); +out_free_udc_device_name: + kfree(udc_device_name); +out_free_udc_driver_name: + kfree(udc_driver_name); +out_free_driver_driver_name: + kfree(driver_driver_name); +out_free_driver_id_number: + ida_free(&driver_id_numbers, dev->driver_id_number); return ret; } From 07f1d9a6b75d124715f3c3528932b601bc54f305 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Wed, 22 Jun 2022 10:46:31 -0400 Subject: [PATCH 0088/1056] USB: gadget: Fix double-free bug in raw_gadget driver commit 90bc2af24638659da56397ff835f3c95a948f991 upstream. Re-reading a recently merged fix to the raw_gadget driver showed that it inadvertently introduced a double-free bug in a failure pathway. If raw_ioctl_init() encounters an error after the driver ID number has been allocated, it deallocates the ID number before returning. But when dev_free() runs later on, it will then try to deallocate the ID number a second time. Closely related to this issue is another error in the recent fix: The ID number is stored in the raw_dev structure before the code checks to see whether the structure has already been initialized, in which case the new ID number would overwrite the earlier value. The solution to both bugs is to keep the new ID number in a local variable, and store it in the raw_dev structure only after the check for prior initialization. No errors can occur after that point, so the double-free will never happen. Fixes: f2d8c2606825 ("usb: gadget: Fix non-unique driver names in raw-gadget driver") CC: Andrey Konovalov CC: Signed-off-by: Alan Stern Link: https://lore.kernel.org/r/YrMrRw5AyIZghN0v@rowland.harvard.edu Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/legacy/raw_gadget.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/usb/gadget/legacy/raw_gadget.c b/drivers/usb/gadget/legacy/raw_gadget.c index cc8de19ab91661..2869bda642292d 100644 --- a/drivers/usb/gadget/legacy/raw_gadget.c +++ b/drivers/usb/gadget/legacy/raw_gadget.c @@ -430,6 +430,7 @@ static int raw_release(struct inode *inode, struct file *fd) static int raw_ioctl_init(struct raw_dev *dev, unsigned long value) { int ret = 0; + int driver_id_number; struct usb_raw_init arg; char *udc_driver_name; char *udc_device_name; @@ -452,10 +453,9 @@ static int raw_ioctl_init(struct raw_dev *dev, unsigned long value) return -EINVAL; } - ret = ida_alloc(&driver_id_numbers, GFP_KERNEL); - if (ret < 0) - return ret; - dev->driver_id_number = ret; + driver_id_number = ida_alloc(&driver_id_numbers, GFP_KERNEL); + if (driver_id_number < 0) + return driver_id_number; driver_driver_name = kmalloc(DRIVER_DRIVER_NAME_LENGTH_MAX, GFP_KERNEL); if (!driver_driver_name) { @@ -463,7 +463,7 @@ static int raw_ioctl_init(struct raw_dev *dev, unsigned long value) goto out_free_driver_id_number; } snprintf(driver_driver_name, DRIVER_DRIVER_NAME_LENGTH_MAX, - DRIVER_NAME ".%d", dev->driver_id_number); + DRIVER_NAME ".%d", driver_id_number); udc_driver_name = kmalloc(UDC_NAME_LENGTH_MAX, GFP_KERNEL); if (!udc_driver_name) { @@ -507,6 +507,7 @@ static int raw_ioctl_init(struct raw_dev *dev, unsigned long value) dev->driver.driver.name = driver_driver_name; dev->driver.udc_name = udc_device_name; dev->driver.match_existing_only = 1; + dev->driver_id_number = driver_id_number; dev->state = STATE_DEV_INITIALIZED; spin_unlock_irqrestore(&dev->lock, flags); @@ -521,7 +522,7 @@ static int raw_ioctl_init(struct raw_dev *dev, unsigned long value) out_free_driver_driver_name: kfree(driver_driver_name); out_free_driver_id_number: - ida_free(&driver_id_numbers, dev->driver_id_number); + ida_free(&driver_id_numbers, driver_id_number); return ret; } From 308df8d4e41b44722525aea81e5de9d976ed9d9f Mon Sep 17 00:00:00 2001 From: Xu Yang Date: Thu, 23 Jun 2022 11:02:42 +0800 Subject: [PATCH 0089/1056] usb: chipidea: udc: check request status before setting device address commit b24346a240b36cfc4df194d145463874985aa29b upstream. The complete() function may be called even though request is not completed. In this case, it's necessary to check request status so as not to set device address wrongly. Fixes: 10775eb17bee ("usb: chipidea: udc: update gadget states according to ch9") cc: Signed-off-by: Xu Yang Link: https://lore.kernel.org/r/20220623030242.41796-1-xu.yang_2@nxp.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/chipidea/udc.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/chipidea/udc.c b/drivers/usb/chipidea/udc.c index 8834ca6137219e..aacc37736db6ee 100644 --- a/drivers/usb/chipidea/udc.c +++ b/drivers/usb/chipidea/udc.c @@ -1040,6 +1040,9 @@ isr_setup_status_complete(struct usb_ep *ep, struct usb_request *req) struct ci_hdrc *ci = req->context; unsigned long flags; + if (req->status < 0) + return; + if (ci->setaddr) { hw_usb_set_address(ci, ci->address); ci->setaddr = false; From 9a0b865d8b4ce8481bfa783ac41a841c6d0d5731 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 15 Jun 2022 15:54:02 +0200 Subject: [PATCH 0090/1056] dt-bindings: usb: ohci: Increase the number of PHYs commit 0f074c1c95ea496dc91279b6c4b9845a337517fa upstream. "make dtbs_check": arch/arm/boot/dts/r8a77470-iwg23s-sbc.dtb: usb@ee080000: phys: [[17, 0], [31]] is too long From schema: Documentation/devicetree/bindings/usb/generic-ohci.yaml arch/arm/boot/dts/r8a77470-iwg23s-sbc.dtb: usb@ee0c0000: phys: [[17, 1], [33], [21, 0]] is too long From schema: Documentation/devicetree/bindings/usb/generic-ohci.yaml Some USB OHCI controllers (e.g. on the Renesas RZ/G1C SoC) have multiple PHYs. Increase the maximum number of PHYs to 3, which is sufficient for now. Fixes: 0499220d6dadafa5 ("dt-bindings: Add missing array size constraints") Signed-off-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/0112f9c8881513cb33bf7b66bc743dd08b35a2f5.1655301203.git.geert+renesas@glider.be Signed-off-by: Greg Kroah-Hartman --- Documentation/devicetree/bindings/usb/generic-ohci.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/usb/generic-ohci.yaml b/Documentation/devicetree/bindings/usb/generic-ohci.yaml index acbf94fa5f74a0..d5fd3aa53ed291 100644 --- a/Documentation/devicetree/bindings/usb/generic-ohci.yaml +++ b/Documentation/devicetree/bindings/usb/generic-ohci.yaml @@ -102,7 +102,8 @@ properties: Overrides the detected port count phys: - maxItems: 1 + minItems: 1 + maxItems: 3 phy-names: const: usb From 46336a59a4a7dc01e5985e3b825d410de657caa0 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 15 Jun 2022 15:53:09 +0200 Subject: [PATCH 0091/1056] dt-bindings: usb: ehci: Increase the number of PHYs commit 9faa1c8f92f33daad9db96944139de225cefa199 upstream. "make dtbs_check": arch/arm/boot/dts/r8a77470-iwg23s-sbc.dtb: usb@ee080100: phys: [[17, 0], [31]] is too long From schema: Documentation/devicetree/bindings/usb/generic-ehci.yaml arch/arm/boot/dts/r8a77470-iwg23s-sbc.dtb: usb@ee0c0100: phys: [[17, 1], [33], [21, 0]] is too long From schema: Documentation/devicetree/bindings/usb/generic-ehci.yaml Some USB EHCI controllers (e.g. on the Renesas RZ/G1C SoC) have multiple PHYs. Increase the maximum number of PHYs to 3, which is sufficient for now. Fixes: 0499220d6dadafa5 ("dt-bindings: Add missing array size constraints") Signed-off-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/c5d19e2f9714f43effd90208798fc1936098078f.1655301043.git.geert+renesas@glider.be Signed-off-by: Greg Kroah-Hartman --- Documentation/devicetree/bindings/usb/generic-ehci.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/usb/generic-ehci.yaml b/Documentation/devicetree/bindings/usb/generic-ehci.yaml index 8913497624de28..cb5da1df8d4052 100644 --- a/Documentation/devicetree/bindings/usb/generic-ehci.yaml +++ b/Documentation/devicetree/bindings/usb/generic-ehci.yaml @@ -135,7 +135,8 @@ properties: Phandle of a companion. phys: - maxItems: 1 + minItems: 1 + maxItems: 3 phy-names: const: usb From 1238f580cd812c588d5ad67067795692d7a1828f Mon Sep 17 00:00:00 2001 From: Zygo Blaxell Date: Wed, 8 Jun 2022 22:39:36 -0400 Subject: [PATCH 0092/1056] btrfs: don't set lock_owner when locking extent buffer for reading commit 97e86631bccddfbbe0c13f9a9605cdef11d31296 upstream. In 196d59ab9ccc "btrfs: switch extent buffer tree lock to rw_semaphore" the functions for tree read locking were rewritten, and in the process the read lock functions started setting eb->lock_owner = current->pid. Previously lock_owner was only set in tree write lock functions. Read locks are shared, so they don't have exclusive ownership of the underlying object, so setting lock_owner to any single value for a read lock makes no sense. It's mostly harmless because write locks and read locks are mutually exclusive, and none of the existing code in btrfs (btrfs_init_new_buffer and print_eb_refs_lock) cares what nonsense is written in lock_owner when no writer is holding the lock. KCSAN does care, and will complain about the data race incessantly. Remove the assignments in the read lock functions because they're useless noise. Fixes: 196d59ab9ccc ("btrfs: switch extent buffer tree lock to rw_semaphore") CC: stable@vger.kernel.org # 5.15+ Reviewed-by: Nikolay Borisov Reviewed-by: Filipe Manana Signed-off-by: Zygo Blaxell Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/locking.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c index 313d9d685adb7a..33461b4f9c8b5c 100644 --- a/fs/btrfs/locking.c +++ b/fs/btrfs/locking.c @@ -45,7 +45,6 @@ void __btrfs_tree_read_lock(struct extent_buffer *eb, enum btrfs_lock_nesting ne start_ns = ktime_get_ns(); down_read_nested(&eb->lock, nest); - eb->lock_owner = current->pid; trace_btrfs_tree_read_lock(eb, start_ns); } @@ -62,7 +61,6 @@ void btrfs_tree_read_lock(struct extent_buffer *eb) int btrfs_try_tree_read_lock(struct extent_buffer *eb) { if (down_read_trylock(&eb->lock)) { - eb->lock_owner = current->pid; trace_btrfs_try_tree_read_lock(eb); return 1; } @@ -90,7 +88,6 @@ int btrfs_try_tree_write_lock(struct extent_buffer *eb) void btrfs_tree_read_unlock(struct extent_buffer *eb) { trace_btrfs_tree_read_unlock(eb); - eb->lock_owner = 0; up_read(&eb->lock); } From d98b5032c9d046c946099fa09a01657f194796ad Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Mon, 13 Jun 2022 15:09:49 -0400 Subject: [PATCH 0093/1056] btrfs: fix deadlock with fsync+fiemap+transaction commit commit bf7ba8ee759b7b7a34787ddd8dc3f190a3d7fa24 upstream. We are hitting the following deadlock in production occasionally Task 1 Task 2 Task 3 Task 4 Task 5 fsync(A) start trans start commit falloc(A) lock 5m-10m start trans wait for commit fiemap(A) lock 0-10m wait for 5m-10m (have 0-5m locked) have btrfs_need_log_full_commit !full_sync wait_ordered_extents finish_ordered_io(A) lock 0-5m DEADLOCK We have an existing dependency of file extent lock -> transaction. However in fsync if we tried to do the fast logging, but then had to fall back to committing the transaction, we will be forced to call btrfs_wait_ordered_range() to make sure all of our extents are updated. This creates a dependency of transaction -> file extent lock, because btrfs_finish_ordered_io() will need to take the file extent lock in order to run the ordered extents. Fix this by stopping the transaction if we have to do the full commit and we attempted to do the fast logging. Then attach to the transaction and commit it if we need to. CC: stable@vger.kernel.org # 5.15+ Reviewed-by: Filipe Manana Signed-off-by: Josef Bacik Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/file.c | 67 ++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 52 insertions(+), 15 deletions(-) diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index ff578c934bbcfa..a06c8366a8f4e1 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -2337,25 +2337,62 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) */ btrfs_inode_unlock(inode, BTRFS_ILOCK_MMAP); - if (ret != BTRFS_NO_LOG_SYNC) { + if (ret == BTRFS_NO_LOG_SYNC) { + ret = btrfs_end_transaction(trans); + goto out; + } + + /* We successfully logged the inode, attempt to sync the log. */ + if (!ret) { + ret = btrfs_sync_log(trans, root, &ctx); if (!ret) { - ret = btrfs_sync_log(trans, root, &ctx); - if (!ret) { - ret = btrfs_end_transaction(trans); - goto out; - } - } - if (!full_sync) { - ret = btrfs_wait_ordered_range(inode, start, len); - if (ret) { - btrfs_end_transaction(trans); - goto out; - } + ret = btrfs_end_transaction(trans); + goto out; } - ret = btrfs_commit_transaction(trans); - } else { + } + + /* + * At this point we need to commit the transaction because we had + * btrfs_need_log_full_commit() or some other error. + * + * If we didn't do a full sync we have to stop the trans handle, wait on + * the ordered extents, start it again and commit the transaction. If + * we attempt to wait on the ordered extents here we could deadlock with + * something like fallocate() that is holding the extent lock trying to + * start a transaction while some other thread is trying to commit the + * transaction while we (fsync) are currently holding the transaction + * open. + */ + if (!full_sync) { ret = btrfs_end_transaction(trans); + if (ret) + goto out; + ret = btrfs_wait_ordered_range(inode, start, len); + if (ret) + goto out; + + /* + * This is safe to use here because we're only interested in + * making sure the transaction that had the ordered extents is + * committed. We aren't waiting on anything past this point, + * we're purely getting the transaction and committing it. + */ + trans = btrfs_attach_transaction_barrier(root); + if (IS_ERR(trans)) { + ret = PTR_ERR(trans); + + /* + * We committed the transaction and there's no currently + * running transaction, this means everything we care + * about made it to disk and we are done. + */ + if (ret == -ENOENT) + ret = 0; + goto out; + } } + + ret = btrfs_commit_transaction(trans); out: ASSERT(list_empty(&ctx.list)); err = file_check_and_advance_wb_err(file); From f650029de357bee182233f635589e8afea90ba44 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 31 May 2022 18:27:09 -0700 Subject: [PATCH 0094/1056] f2fs: attach inline_data after setting compression commit 4cde00d50707c2ef6647b9b96b2cb40b6eb24397 upstream. This fixes the below corruption. [345393.335389] F2FS-fs (vdb): sanity_check_inode: inode (ino=6d0, mode=33206) should not have inline_data, run fsck to fix Cc: Fixes: 677a82b44ebf ("f2fs: fix to do sanity check for inline inode") Signed-off-by: Jaegeuk Kim Signed-off-by: Greg Kroah-Hartman --- fs/f2fs/namei.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index e4b25ef871b337..7a86a8dcf4f1cf 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -91,8 +91,6 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode) if (test_opt(sbi, INLINE_XATTR)) set_inode_flag(inode, FI_INLINE_XATTR); - if (test_opt(sbi, INLINE_DATA) && f2fs_may_inline_data(inode)) - set_inode_flag(inode, FI_INLINE_DATA); if (f2fs_may_inline_dentry(inode)) set_inode_flag(inode, FI_INLINE_DENTRY); @@ -109,10 +107,6 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode) f2fs_init_extent_tree(inode, NULL); - stat_inc_inline_xattr(inode); - stat_inc_inline_inode(inode); - stat_inc_inline_dir(inode); - F2FS_I(inode)->i_flags = f2fs_mask_flags(mode, F2FS_I(dir)->i_flags & F2FS_FL_INHERITED); @@ -129,6 +123,14 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode) set_compress_context(inode); } + /* Should enable inline_data after compression set */ + if (test_opt(sbi, INLINE_DATA) && f2fs_may_inline_data(inode)) + set_inode_flag(inode, FI_INLINE_DATA); + + stat_inc_inline_xattr(inode); + stat_inc_inline_inode(inode); + stat_inc_inline_dir(inode); + f2fs_set_inode_flags(inode); trace_f2fs_new_inode(inode, 0); @@ -327,6 +329,9 @@ static void set_compress_inode(struct f2fs_sb_info *sbi, struct inode *inode, if (!is_extension_exist(name, ext[i], false)) continue; + /* Do not use inline_data with compression */ + stat_dec_inline_inode(inode); + clear_inode_flag(inode, FI_INLINE_DATA); set_compress_context(inode); return; } From 974e69beebb3ca6bf51b1a7734fbc8d67c228f10 Mon Sep 17 00:00:00 2001 From: Dmitry Rokosov Date: Tue, 24 May 2022 18:14:46 +0000 Subject: [PATCH 0095/1056] iio:humidity:hts221: rearrange iio trigger get and register commit 10b9c2c33ac706face458feab8965f11743c98c0 upstream. IIO trigger interface function iio_trigger_get() should be called after iio_trigger_register() (or its devm analogue) strictly, because of iio_trigger_get() acquires module refcnt based on the trigger->owner pointer, which is initialized inside iio_trigger_register() to THIS_MODULE. If this call order is wrong, the next iio_trigger_put() (from sysfs callback or "delete module" path) will dereference "default" module refcnt, which is incorrect behaviour. Fixes: e4a70e3e7d84 ("iio: humidity: add support to hts221 rh/temp combo device") Signed-off-by: Dmitry Rokosov Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20220524181150.9240-6-ddrokosov@sberdevices.ru Cc: Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/humidity/hts221_buffer.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/iio/humidity/hts221_buffer.c b/drivers/iio/humidity/hts221_buffer.c index f29692b9d2db0e..66b32413cf5e26 100644 --- a/drivers/iio/humidity/hts221_buffer.c +++ b/drivers/iio/humidity/hts221_buffer.c @@ -135,9 +135,12 @@ int hts221_allocate_trigger(struct iio_dev *iio_dev) iio_trigger_set_drvdata(hw->trig, iio_dev); hw->trig->ops = &hts221_trigger_ops; + + err = devm_iio_trigger_register(hw->dev, hw->trig); + iio_dev->trig = iio_trigger_get(hw->trig); - return devm_iio_trigger_register(hw->dev, hw->trig); + return err; } static int hts221_buffer_preenable(struct iio_dev *iio_dev) From a1356318042ed84ada9de7bcc3887562e78e729c Mon Sep 17 00:00:00 2001 From: Dmitry Rokosov Date: Tue, 24 May 2022 18:14:45 +0000 Subject: [PATCH 0096/1056] iio:chemical:ccs811: rearrange iio trigger get and register commit d710359c0b445e8c03e24f19ae2fb79ce7282260 upstream. IIO trigger interface function iio_trigger_get() should be called after iio_trigger_register() (or its devm analogue) strictly, because of iio_trigger_get() acquires module refcnt based on the trigger->owner pointer, which is initialized inside iio_trigger_register() to THIS_MODULE. If this call order is wrong, the next iio_trigger_put() (from sysfs callback or "delete module" path) will dereference "default" module refcnt, which is incorrect behaviour. Fixes: f1f065d7ac30 ("iio: chemical: ccs811: Add support for data ready trigger") Signed-off-by: Dmitry Rokosov Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20220524181150.9240-5-ddrokosov@sberdevices.ru Cc: Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/chemical/ccs811.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iio/chemical/ccs811.c b/drivers/iio/chemical/ccs811.c index 847194fa1e4645..80ef1aa9aae3b0 100644 --- a/drivers/iio/chemical/ccs811.c +++ b/drivers/iio/chemical/ccs811.c @@ -499,11 +499,11 @@ static int ccs811_probe(struct i2c_client *client, data->drdy_trig->ops = &ccs811_trigger_ops; iio_trigger_set_drvdata(data->drdy_trig, indio_dev); - indio_dev->trig = data->drdy_trig; - iio_trigger_get(indio_dev->trig); ret = iio_trigger_register(data->drdy_trig); if (ret) goto err_poweroff; + + indio_dev->trig = iio_trigger_get(data->drdy_trig); } ret = iio_triggered_buffer_setup(indio_dev, NULL, From 240fb3913f183eb5c0fc45d86b51f1c5e9754ed2 Mon Sep 17 00:00:00 2001 From: Dmitry Rokosov Date: Tue, 24 May 2022 18:14:42 +0000 Subject: [PATCH 0097/1056] iio:accel:kxcjk-1013: rearrange iio trigger get and register commit ed302925d708f2f97ae5e9fd6c56c16bb34f6629 upstream. IIO trigger interface function iio_trigger_get() should be called after iio_trigger_register() (or its devm analogue) strictly, because of iio_trigger_get() acquires module refcnt based on the trigger->owner pointer, which is initialized inside iio_trigger_register() to THIS_MODULE. If this call order is wrong, the next iio_trigger_put() (from sysfs callback or "delete module" path) will dereference "default" module refcnt, which is incorrect behaviour. Fixes: c1288b833881 ("iio: accel: kxcjk-1013: Increment ref counter for indio_dev->trig") Signed-off-by: Dmitry Rokosov Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20220524181150.9240-3-ddrokosov@sberdevices.ru Cc: Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/accel/kxcjk-1013.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iio/accel/kxcjk-1013.c b/drivers/iio/accel/kxcjk-1013.c index ba6c8ca488b1ae..594a383169c75c 100644 --- a/drivers/iio/accel/kxcjk-1013.c +++ b/drivers/iio/accel/kxcjk-1013.c @@ -1553,12 +1553,12 @@ static int kxcjk1013_probe(struct i2c_client *client, data->dready_trig->ops = &kxcjk1013_trigger_ops; iio_trigger_set_drvdata(data->dready_trig, indio_dev); - indio_dev->trig = data->dready_trig; - iio_trigger_get(indio_dev->trig); ret = iio_trigger_register(data->dready_trig); if (ret) goto err_poweroff; + indio_dev->trig = iio_trigger_get(data->dready_trig); + data->motion_trig->ops = &kxcjk1013_trigger_ops; iio_trigger_set_drvdata(data->motion_trig, indio_dev); ret = iio_trigger_register(data->motion_trig); From 3357fb9da21aefdab80a980bb506da6caf20ae7d Mon Sep 17 00:00:00 2001 From: Dmitry Rokosov Date: Tue, 24 May 2022 18:14:39 +0000 Subject: [PATCH 0098/1056] iio:accel:bma180: rearrange iio trigger get and register commit e5f3205b04d7f95a2ef43bce4b454a7f264d6923 upstream. IIO trigger interface function iio_trigger_get() should be called after iio_trigger_register() (or its devm analogue) strictly, because of iio_trigger_get() acquires module refcnt based on the trigger->owner pointer, which is initialized inside iio_trigger_register() to THIS_MODULE. If this call order is wrong, the next iio_trigger_put() (from sysfs callback or "delete module" path) will dereference "default" module refcnt, which is incorrect behaviour. Fixes: 0668a4e4d297 ("iio: accel: bma180: Fix indio_dev->trig assignment") Signed-off-by: Dmitry Rokosov Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20220524181150.9240-2-ddrokosov@sberdevices.ru Cc: Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/accel/bma180.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/iio/accel/bma180.c b/drivers/iio/accel/bma180.c index 2edfcb4819b7db..3a1f47c7288ff3 100644 --- a/drivers/iio/accel/bma180.c +++ b/drivers/iio/accel/bma180.c @@ -1006,11 +1006,12 @@ static int bma180_probe(struct i2c_client *client, data->trig->ops = &bma180_trigger_ops; iio_trigger_set_drvdata(data->trig, indio_dev); - indio_dev->trig = iio_trigger_get(data->trig); ret = iio_trigger_register(data->trig); if (ret) goto err_trigger_free; + + indio_dev->trig = iio_trigger_get(data->trig); } ret = iio_triggered_buffer_setup(indio_dev, NULL, From cb0d87f2519dc309b58c3fe3d6239441a4b5ce74 Mon Sep 17 00:00:00 2001 From: Dmitry Rokosov Date: Tue, 24 May 2022 18:14:43 +0000 Subject: [PATCH 0099/1056] iio:accel:mxc4005: rearrange iio trigger get and register commit 9354c224c9b4f55847a0de3e968cba2ebf15af3b upstream. IIO trigger interface function iio_trigger_get() should be called after iio_trigger_register() (or its devm analogue) strictly, because of iio_trigger_get() acquires module refcnt based on the trigger->owner pointer, which is initialized inside iio_trigger_register() to THIS_MODULE. If this call order is wrong, the next iio_trigger_put() (from sysfs callback or "delete module" path) will dereference "default" module refcnt, which is incorrect behaviour. Fixes: 47196620c82f ("iio: mxc4005: add data ready trigger for mxc4005") Signed-off-by: Dmitry Rokosov Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20220524181150.9240-4-ddrokosov@sberdevices.ru Cc: Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/accel/mxc4005.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iio/accel/mxc4005.c b/drivers/iio/accel/mxc4005.c index b3afbf06491526..df600d2917c0ad 100644 --- a/drivers/iio/accel/mxc4005.c +++ b/drivers/iio/accel/mxc4005.c @@ -456,8 +456,6 @@ static int mxc4005_probe(struct i2c_client *client, data->dready_trig->ops = &mxc4005_trigger_ops; iio_trigger_set_drvdata(data->dready_trig, indio_dev); - indio_dev->trig = data->dready_trig; - iio_trigger_get(indio_dev->trig); ret = devm_iio_trigger_register(&client->dev, data->dready_trig); if (ret) { @@ -465,6 +463,8 @@ static int mxc4005_probe(struct i2c_client *client, "failed to register trigger\n"); return ret; } + + indio_dev->trig = iio_trigger_get(data->dready_trig); } return devm_iio_device_register(&client->dev, indio_dev); From 005cb02224a9c9b0ab487df86a18328e99b05f1e Mon Sep 17 00:00:00 2001 From: Haibo Chen Date: Wed, 15 Jun 2022 19:31:58 +0800 Subject: [PATCH 0100/1056] iio: accel: mma8452: ignore the return value of reset operation commit bf745142cc0a3e1723f9207fb0c073c88464b7b4 upstream. On fxls8471, after set the reset bit, the device will reset immediately, will not give ACK. So ignore the return value of this reset operation, let the following code logic to check whether the reset operation works. Signed-off-by: Haibo Chen Fixes: ecabae713196 ("iio: mma8452: Initialise before activating") Reviewed-by: Hans de Goede Link: https://lore.kernel.org/r/1655292718-14287-1-git-send-email-haibo.chen@nxp.com Cc: Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/accel/mma8452.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/iio/accel/mma8452.c b/drivers/iio/accel/mma8452.c index 40faf09f5d875f..373b59557afe96 100644 --- a/drivers/iio/accel/mma8452.c +++ b/drivers/iio/accel/mma8452.c @@ -1493,10 +1493,14 @@ static int mma8452_reset(struct i2c_client *client) int i; int ret; - ret = i2c_smbus_write_byte_data(client, MMA8452_CTRL_REG2, + /* + * Find on fxls8471, after config reset bit, it reset immediately, + * and will not give ACK, so here do not check the return value. + * The following code will read the reset register, and check whether + * this reset works. + */ + i2c_smbus_write_byte_data(client, MMA8452_CTRL_REG2, MMA8452_CTRL_REG2_RST); - if (ret < 0) - return ret; for (i = 0; i < 10; i++) { usleep_range(100, 200); From f359c4751de1c2f1b07851fc95834331941cb8cb Mon Sep 17 00:00:00 2001 From: Zheyu Ma Date: Tue, 10 May 2022 17:24:31 +0800 Subject: [PATCH 0101/1056] iio: gyro: mpu3050: Fix the error handling in mpu3050_power_up() commit b2f5ad97645e1deb5ca9bcb7090084b92cae35d2 upstream. The driver should disable regulators when fails at regmap_update_bits(). Signed-off-by: Zheyu Ma Reviewed-by: Linus Walleij Cc: Link: https://lore.kernel.org/r/20220510092431.1711284-1-zheyuma97@gmail.com Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/gyro/mpu3050-core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/iio/gyro/mpu3050-core.c b/drivers/iio/gyro/mpu3050-core.c index 3225de1f023b35..5311bee5475ff1 100644 --- a/drivers/iio/gyro/mpu3050-core.c +++ b/drivers/iio/gyro/mpu3050-core.c @@ -876,6 +876,7 @@ static int mpu3050_power_up(struct mpu3050 *mpu3050) ret = regmap_update_bits(mpu3050->map, MPU3050_PWR_MGM, MPU3050_PWR_MGM_SLEEP, 0); if (ret) { + regulator_bulk_disable(ARRAY_SIZE(mpu3050->regs), mpu3050->regs); dev_err(mpu3050->dev, "error setting power mode\n"); return ret; } From 4687c3f955240ca2a576bdc3f742d4d915b6272d Mon Sep 17 00:00:00 2001 From: Vincent Whitchurch Date: Thu, 19 May 2022 11:19:25 +0200 Subject: [PATCH 0102/1056] iio: trigger: sysfs: fix use-after-free on remove commit 78601726d4a59a291acc5a52da1d3a0a6831e4e8 upstream. Ensure that the irq_work has completed before the trigger is freed. ================================================================== BUG: KASAN: use-after-free in irq_work_run_list Read of size 8 at addr 0000000064702248 by task python3/25 Call Trace: irq_work_run_list irq_work_tick update_process_times tick_sched_handle tick_sched_timer __hrtimer_run_queues hrtimer_interrupt Allocated by task 25: kmem_cache_alloc_trace iio_sysfs_trig_add dev_attr_store sysfs_kf_write kernfs_fop_write_iter new_sync_write vfs_write ksys_write sys_write Freed by task 25: kfree iio_sysfs_trig_remove dev_attr_store sysfs_kf_write kernfs_fop_write_iter new_sync_write vfs_write ksys_write sys_write ================================================================== Fixes: f38bc926d022 ("staging:iio:sysfs-trigger: Use irq_work to properly active trigger") Signed-off-by: Vincent Whitchurch Reviewed-by: Lars-Peter Clausen Link: https://lore.kernel.org/r/20220519091925.1053897-1-vincent.whitchurch@axis.com Cc: Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/trigger/iio-trig-sysfs.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/iio/trigger/iio-trig-sysfs.c b/drivers/iio/trigger/iio-trig-sysfs.c index e9adfff45b39b2..bec9b94e088b9c 100644 --- a/drivers/iio/trigger/iio-trig-sysfs.c +++ b/drivers/iio/trigger/iio-trig-sysfs.c @@ -195,6 +195,7 @@ static int iio_sysfs_trigger_remove(int id) } iio_trigger_unregister(t->trig); + irq_work_sync(&t->work); iio_trigger_free(t->trig); list_del(&t->l); From 2a2d448a74ab0c06410a3d7ba680026bd1cabef2 Mon Sep 17 00:00:00 2001 From: Olivier Moysan Date: Thu, 9 Jun 2022 11:52:34 +0200 Subject: [PATCH 0103/1056] iio: adc: stm32: fix maximum clock rate for stm32mp15x commit 990539486e7e311fb5dab1bf4d85d1a8973ae644 upstream. Change maximum STM32 ADC input clock rate to 36MHz, as specified in STM32MP15x datasheets. Fixes: d58c67d1d851 ("iio: adc: stm32-adc: add support for STM32MP1") Signed-off-by: Olivier Moysan Reviewed-by: Fabrice Gasnier Link: https://lore.kernel.org/r/20220609095234.375925-1-olivier.moysan@foss.st.com Cc: Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/adc/stm32-adc-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/adc/stm32-adc-core.c b/drivers/iio/adc/stm32-adc-core.c index c088cb990193ca..e7451d458282ae 100644 --- a/drivers/iio/adc/stm32-adc-core.c +++ b/drivers/iio/adc/stm32-adc-core.c @@ -809,7 +809,7 @@ static const struct stm32_adc_priv_cfg stm32h7_adc_priv_cfg = { static const struct stm32_adc_priv_cfg stm32mp1_adc_priv_cfg = { .regs = &stm32h7_adc_common_regs, .clk_sel = stm32h7_adc_clk_sel, - .max_clk_rate_hz = 40000000, + .max_clk_rate_hz = 36000000, .has_syscfg = HAS_VBOOSTER | HAS_ANASWVDD, .num_irqs = 2, }; From 80e80577043fa4336126f0886de6cd39366765b3 Mon Sep 17 00:00:00 2001 From: Jean-Baptiste Maneyrol Date: Thu, 9 Jun 2022 12:23:01 +0200 Subject: [PATCH 0104/1056] iio: imu: inv_icm42600: Fix broken icm42600 (chip id 0 value) commit 106b391e1b859100a3f38f0ad874236e9be06bde upstream. The 0 value used for INV_CHIP_ICM42600 was not working since the match in i2c/spi was checking against NULL value. To keep this check, add a first INV_CHIP_INVALID 0 value as safe guard. Fixes: 31c24c1e93c3 ("iio: imu: inv_icm42600: add core of new inv_icm42600 driver") Signed-off-by: Jean-Baptiste Maneyrol Link: https://lore.kernel.org/r/20220609102301.4794-1-jmaneyrol@invensense.com Cc: Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/imu/inv_icm42600/inv_icm42600.h | 1 + drivers/iio/imu/inv_icm42600/inv_icm42600_core.c | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/iio/imu/inv_icm42600/inv_icm42600.h b/drivers/iio/imu/inv_icm42600/inv_icm42600.h index c0f5059b13b316..995a9dc06521de 100644 --- a/drivers/iio/imu/inv_icm42600/inv_icm42600.h +++ b/drivers/iio/imu/inv_icm42600/inv_icm42600.h @@ -17,6 +17,7 @@ #include "inv_icm42600_buffer.h" enum inv_icm42600_chip { + INV_CHIP_INVALID, INV_CHIP_ICM42600, INV_CHIP_ICM42602, INV_CHIP_ICM42605, diff --git a/drivers/iio/imu/inv_icm42600/inv_icm42600_core.c b/drivers/iio/imu/inv_icm42600/inv_icm42600_core.c index 86858da9cc38f7..ca85fccc98393a 100644 --- a/drivers/iio/imu/inv_icm42600/inv_icm42600_core.c +++ b/drivers/iio/imu/inv_icm42600/inv_icm42600_core.c @@ -565,7 +565,7 @@ int inv_icm42600_core_probe(struct regmap *regmap, int chip, int irq, bool open_drain; int ret; - if (chip < 0 || chip >= INV_CHIP_NB) { + if (chip <= INV_CHIP_INVALID || chip >= INV_CHIP_NB) { dev_err(dev, "invalid chip = %d\n", chip); return -ENODEV; } From 148bab179f04583b271aaf584f333a1678f6cc09 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Tue, 24 May 2022 09:54:48 +0200 Subject: [PATCH 0105/1056] iio: afe: rescale: Fix boolean logic bug commit 9decacd8b3a432316d61c4366f302e63384cb08d upstream. When introducing support for processed channels I needed to invert the expression: if (!iio_channel_has_info(schan, IIO_CHAN_INFO_RAW) || !iio_channel_has_info(schan, IIO_CHAN_INFO_SCALE)) dev_err(dev, "source channel does not support raw/scale\n"); To the inverse, meaning detect when we can usse raw+scale rather than when we can not. This was the result: if (iio_channel_has_info(schan, IIO_CHAN_INFO_RAW) || iio_channel_has_info(schan, IIO_CHAN_INFO_SCALE)) dev_info(dev, "using raw+scale source channel\n"); Ooops. Spot the error. Yep old George Boole came up and bit me. That should be an &&. The current code "mostly works" because we have not run into systems supporting only raw but not scale or only scale but not raw, and I doubt there are few using the rescaler on anything such, but let's fix the logic. Cc: Liam Beguin Cc: stable@vger.kernel.org Fixes: 53ebee949980 ("iio: afe: iio-rescale: Support processed channels") Signed-off-by: Linus Walleij Reviewed-by: Liam Beguin Acked-by: Peter Rosin Link: https://lore.kernel.org/r/20220524075448.140238-1-linus.walleij@linaro.org Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/afe/iio-rescale.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/afe/iio-rescale.c b/drivers/iio/afe/iio-rescale.c index 271d73e420c427..cc28713b0dc8b8 100644 --- a/drivers/iio/afe/iio-rescale.c +++ b/drivers/iio/afe/iio-rescale.c @@ -148,7 +148,7 @@ static int rescale_configure_channel(struct device *dev, chan->ext_info = rescale->ext_info; chan->type = rescale->cfg->type; - if (iio_channel_has_info(schan, IIO_CHAN_INFO_RAW) || + if (iio_channel_has_info(schan, IIO_CHAN_INFO_RAW) && iio_channel_has_info(schan, IIO_CHAN_INFO_SCALE)) { dev_info(dev, "using raw+scale source channel\n"); } else if (iio_channel_has_info(schan, IIO_CHAN_INFO_PROCESSED)) { From d361b3cc1cf8abe81a7046cd6e4a583aad867dfe Mon Sep 17 00:00:00 2001 From: Yannick Brosseau Date: Mon, 16 May 2022 16:39:38 -0400 Subject: [PATCH 0106/1056] iio: adc: stm32: Fix ADCs iteration in irq handler commit d2214cca4d3eadc74eac9e30301ec7cad5355f00 upstream. The irq handler was only checking the mask for the first ADCs in the case of the F4 and H7 generation, since it was iterating up to the num_irq value. This patch add the maximum number of ADC in the common register, which map to the number of entries of eoc_msk and ovr_msk in stm32_adc_common_regs. This allow the handler to check all ADCs in that module. Tested on a STM32F429NIH6. Fixes: 695e2f5c289b ("iio: adc: stm32-adc: fix a regression when using dma and irq") Signed-off-by: Yannick Brosseau Reviewed-by: Fabrice Gasnier Link: https://lore.kernel.org/r/20220516203939.3498673-2-yannick.brosseau@gmail.com Cc: Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/adc/stm32-adc-core.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/iio/adc/stm32-adc-core.c b/drivers/iio/adc/stm32-adc-core.c index e7451d458282ae..42faca457ace81 100644 --- a/drivers/iio/adc/stm32-adc-core.c +++ b/drivers/iio/adc/stm32-adc-core.c @@ -64,6 +64,7 @@ struct stm32_adc_priv; * @max_clk_rate_hz: maximum analog clock rate (Hz, from datasheet) * @has_syscfg: SYSCFG capability flags * @num_irqs: number of interrupt lines + * @num_adcs: maximum number of ADC instances in the common registers */ struct stm32_adc_priv_cfg { const struct stm32_adc_common_regs *regs; @@ -71,6 +72,7 @@ struct stm32_adc_priv_cfg { u32 max_clk_rate_hz; unsigned int has_syscfg; unsigned int num_irqs; + unsigned int num_adcs; }; /** @@ -352,7 +354,7 @@ static void stm32_adc_irq_handler(struct irq_desc *desc) * before invoking the interrupt handler (e.g. call ISR only for * IRQ-enabled ADCs). */ - for (i = 0; i < priv->cfg->num_irqs; i++) { + for (i = 0; i < priv->cfg->num_adcs; i++) { if ((status & priv->cfg->regs->eoc_msk[i] && stm32_adc_eoc_enabled(priv, i)) || (status & priv->cfg->regs->ovr_msk[i])) @@ -796,6 +798,7 @@ static const struct stm32_adc_priv_cfg stm32f4_adc_priv_cfg = { .clk_sel = stm32f4_adc_clk_sel, .max_clk_rate_hz = 36000000, .num_irqs = 1, + .num_adcs = 3, }; static const struct stm32_adc_priv_cfg stm32h7_adc_priv_cfg = { @@ -804,6 +807,7 @@ static const struct stm32_adc_priv_cfg stm32h7_adc_priv_cfg = { .max_clk_rate_hz = 36000000, .has_syscfg = HAS_VBOOSTER, .num_irqs = 1, + .num_adcs = 2, }; static const struct stm32_adc_priv_cfg stm32mp1_adc_priv_cfg = { @@ -812,6 +816,7 @@ static const struct stm32_adc_priv_cfg stm32mp1_adc_priv_cfg = { .max_clk_rate_hz = 36000000, .has_syscfg = HAS_VBOOSTER | HAS_ANASWVDD, .num_irqs = 2, + .num_adcs = 2, }; static const struct of_device_id stm32_adc_of_match[] = { From 4f89730288ee7e1a5fba304f5a505ae221a40906 Mon Sep 17 00:00:00 2001 From: Yannick Brosseau Date: Mon, 16 May 2022 16:39:39 -0400 Subject: [PATCH 0107/1056] iio: adc: stm32: Fix IRQs on STM32F4 by removing custom spurious IRQs message commit 99bded02dae5e1e2312813506c41dc8db2fb656c upstream. The check for spurious IRQs introduced in 695e2f5c289bb assumed that the bits in the control and status registers are aligned. This is true for the H7 and MP1 version, but not the F4. The interrupt was then never handled on the F4. Instead of increasing the complexity of the comparison and check each bit specifically, we remove this check completely and rely on the generic handler for spurious IRQs. Fixes: 695e2f5c289b ("iio: adc: stm32-adc: fix a regression when using dma and irq") Signed-off-by: Yannick Brosseau Reviewed-by: Fabrice Gasnier Link: https://lore.kernel.org/r/20220516203939.3498673-3-yannick.brosseau@gmail.com Cc: Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/adc/stm32-adc.c | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/drivers/iio/adc/stm32-adc.c b/drivers/iio/adc/stm32-adc.c index e3e75413b49e70..ef5b54ed966144 100644 --- a/drivers/iio/adc/stm32-adc.c +++ b/drivers/iio/adc/stm32-adc.c @@ -1259,7 +1259,6 @@ static irqreturn_t stm32_adc_threaded_isr(int irq, void *data) struct stm32_adc *adc = iio_priv(indio_dev); const struct stm32_adc_regspec *regs = adc->cfg->regs; u32 status = stm32_adc_readl(adc, regs->isr_eoc.reg); - u32 mask = stm32_adc_readl(adc, regs->ier_eoc.reg); /* Check ovr status right now, as ovr mask should be already disabled */ if (status & regs->isr_ovr.mask) { @@ -1274,11 +1273,6 @@ static irqreturn_t stm32_adc_threaded_isr(int irq, void *data) return IRQ_HANDLED; } - if (!(status & mask)) - dev_err_ratelimited(&indio_dev->dev, - "Unexpected IRQ: IER=0x%08x, ISR=0x%08x\n", - mask, status); - return IRQ_NONE; } @@ -1288,10 +1282,6 @@ static irqreturn_t stm32_adc_isr(int irq, void *data) struct stm32_adc *adc = iio_priv(indio_dev); const struct stm32_adc_regspec *regs = adc->cfg->regs; u32 status = stm32_adc_readl(adc, regs->isr_eoc.reg); - u32 mask = stm32_adc_readl(adc, regs->ier_eoc.reg); - - if (!(status & mask)) - return IRQ_WAKE_THREAD; if (status & regs->isr_ovr.mask) { /* From bb6f853289fe0cce52a8f9264d7b3928eefe62b5 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Fri, 6 May 2022 11:50:40 +0200 Subject: [PATCH 0108/1056] iio: adc: axp288: Override TS pin bias current for some models MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 048058399f19d43cf21de9f5d36cd8144337d004 upstream. Since commit 9bcf15f75cac ("iio: adc: axp288: Fix TS-pin handling") we preserve the bias current set by the firmware at boot. This fixes issues we were seeing on various models. Some models like the Nuvision Solo 10 Draw tablet actually need the old hardcoded 80ųA bias current for battery temperature monitoring to work properly. Add a quirk entry for the Nuvision Solo 10 Draw to the DMI quirk table to restore setting the bias current to 80ųA on this model. Fixes: 9bcf15f75cac ("iio: adc: axp288: Fix TS-pin handling") BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=215882 Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20220506095040.21008-1-hdegoede@redhat.com Cc: Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/adc/axp288_adc.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/iio/adc/axp288_adc.c b/drivers/iio/adc/axp288_adc.c index 5f5e8b39e4d227..84dbe9e2f0effb 100644 --- a/drivers/iio/adc/axp288_adc.c +++ b/drivers/iio/adc/axp288_adc.c @@ -196,6 +196,14 @@ static const struct dmi_system_id axp288_adc_ts_bias_override[] = { }, .driver_data = (void *)(uintptr_t)AXP288_ADC_TS_BIAS_80UA, }, + { + /* Nuvision Solo 10 Draw */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "TMAX"), + DMI_MATCH(DMI_PRODUCT_NAME, "TM101W610L"), + }, + .driver_data = (void *)(uintptr_t)AXP288_ADC_TS_BIAS_80UA, + }, {} }; From 4358bf6b1aadb5c2ed0e3ee0ae4748c1273f3718 Mon Sep 17 00:00:00 2001 From: Jialin Zhang Date: Tue, 17 May 2022 11:35:26 +0800 Subject: [PATCH 0109/1056] iio: adc: rzg2l_adc: add missing fwnode_handle_put() in rzg2l_adc_parse_properties() commit d836715f588ea15f905f607c27bc693587058db4 upstream. fwnode_handle_put() should be used when terminating device_for_each_child_node() iteration with break or return to prevent stale device node references from being left behind. Fixes: d484c21bacfa ("iio: adc: Add driver for Renesas RZ/G2L A/D converter") Reported-by: Hulk Robot Signed-off-by: Jialin Zhang Reviewed-by: Lad Prabhakar Reviewed-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20220517033526.2035735-1-zhangjialin11@huawei.com Cc: Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/adc/rzg2l_adc.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/iio/adc/rzg2l_adc.c b/drivers/iio/adc/rzg2l_adc.c index 32fbf57c362fa5..2fa41b90bcfa95 100644 --- a/drivers/iio/adc/rzg2l_adc.c +++ b/drivers/iio/adc/rzg2l_adc.c @@ -334,11 +334,15 @@ static int rzg2l_adc_parse_properties(struct platform_device *pdev, struct rzg2l i = 0; device_for_each_child_node(&pdev->dev, fwnode) { ret = fwnode_property_read_u32(fwnode, "reg", &channel); - if (ret) + if (ret) { + fwnode_handle_put(fwnode); return ret; + } - if (channel >= RZG2L_ADC_MAX_CHANNELS) + if (channel >= RZG2L_ADC_MAX_CHANNELS) { + fwnode_handle_put(fwnode); return -EINVAL; + } chan_array[i].type = IIO_VOLTAGE; chan_array[i].indexed = 1; From ab7bf025cee89db73c649216ddd2bc589c3d3862 Mon Sep 17 00:00:00 2001 From: Miaoqian Lin Date: Tue, 24 May 2022 11:45:17 +0400 Subject: [PATCH 0110/1056] iio: adc: adi-axi-adc: Fix refcount leak in adi_axi_adc_attach_client commit ada7b0c0dedafd7d059115adf49e48acba3153a8 upstream. of_parse_phandle() returns a node pointer with refcount incremented, we should use of_node_put() on it when not need anymore. Add missing of_node_put() to avoid refcount leak. Fixes: ef04070692a2 ("iio: adc: adi-axi-adc: add support for AXI ADC IP core") Signed-off-by: Miaoqian Lin Link: https://lore.kernel.org/r/20220524074517.45268-1-linmq006@gmail.com Cc: Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/adc/adi-axi-adc.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/iio/adc/adi-axi-adc.c b/drivers/iio/adc/adi-axi-adc.c index a73e3c2d212fab..a9e655e69eaa24 100644 --- a/drivers/iio/adc/adi-axi-adc.c +++ b/drivers/iio/adc/adi-axi-adc.c @@ -322,16 +322,19 @@ static struct adi_axi_adc_client *adi_axi_adc_attach_client(struct device *dev) if (!try_module_get(cl->dev->driver->owner)) { mutex_unlock(®istered_clients_lock); + of_node_put(cln); return ERR_PTR(-ENODEV); } get_device(cl->dev); cl->info = info; mutex_unlock(®istered_clients_lock); + of_node_put(cln); return cl; } mutex_unlock(®istered_clients_lock); + of_node_put(cln); return ERR_PTR(-EPROBE_DEFER); } From 711591bf1dab25f5887317dcf31fac5b04e7512e Mon Sep 17 00:00:00 2001 From: Jialin Zhang Date: Tue, 17 May 2022 11:30:20 +0800 Subject: [PATCH 0111/1056] iio: adc: ti-ads131e08: add missing fwnode_handle_put() in ads131e08_alloc_channels() commit 47dcf770abc793f347a65a24c24d550c936f08b0 upstream. fwnode_handle_put() should be used when terminating device_for_each_child_node() iteration with break or return to prevent stale device node references from being left behind. Fixes: d935eddd2799 ("iio: adc: Add driver for Texas Instruments ADS131E0x ADC family") Reported-by: Hulk Robot Signed-off-by: Jialin Zhang Link: https://lore.kernel.org/r/20220517033020.2033324-1-zhangjialin11@huawei.com Cc: Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/adc/ti-ads131e08.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/iio/adc/ti-ads131e08.c b/drivers/iio/adc/ti-ads131e08.c index 0c2025a2257505..80a09817c11942 100644 --- a/drivers/iio/adc/ti-ads131e08.c +++ b/drivers/iio/adc/ti-ads131e08.c @@ -739,7 +739,7 @@ static int ads131e08_alloc_channels(struct iio_dev *indio_dev) device_for_each_child_node(dev, node) { ret = fwnode_property_read_u32(node, "reg", &channel); if (ret) - return ret; + goto err_child_out; ret = fwnode_property_read_u32(node, "ti,gain", &tmp); if (ret) { @@ -747,7 +747,7 @@ static int ads131e08_alloc_channels(struct iio_dev *indio_dev) } else { ret = ads131e08_pga_gain_to_field_value(st, tmp); if (ret < 0) - return ret; + goto err_child_out; channel_config[i].pga_gain = tmp; } @@ -758,7 +758,7 @@ static int ads131e08_alloc_channels(struct iio_dev *indio_dev) } else { ret = ads131e08_validate_channel_mux(st, tmp); if (ret) - return ret; + goto err_child_out; channel_config[i].mux = tmp; } @@ -784,6 +784,10 @@ static int ads131e08_alloc_channels(struct iio_dev *indio_dev) st->channel_config = channel_config; return 0; + +err_child_out: + fwnode_handle_put(node); + return ret; } static void ads131e08_regulator_disable(void *data) From 0162451723178602c37f0555d235dfa17e486112 Mon Sep 17 00:00:00 2001 From: Liang He Date: Fri, 17 Jun 2022 19:53:23 +0800 Subject: [PATCH 0112/1056] xtensa: xtfpga: Fix refcount leak bug in setup commit 173940b3ae40114d4179c251a98ee039dc9cd5b3 upstream. In machine_setup(), of_find_compatible_node() will return a node pointer with refcount incremented. We should use of_node_put() when it is not used anymore. Cc: stable@vger.kernel.org Signed-off-by: Liang He Message-Id: <20220617115323.4046905-1-windhl@126.com> Signed-off-by: Max Filippov Signed-off-by: Greg Kroah-Hartman --- arch/xtensa/platforms/xtfpga/setup.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/xtensa/platforms/xtfpga/setup.c b/arch/xtensa/platforms/xtfpga/setup.c index 538e6748e85a7d..c79c1d09ea8631 100644 --- a/arch/xtensa/platforms/xtfpga/setup.c +++ b/arch/xtensa/platforms/xtfpga/setup.c @@ -133,6 +133,7 @@ static int __init machine_setup(void) if ((eth = of_find_compatible_node(eth, NULL, "opencores,ethoc"))) update_local_mac(eth); + of_node_put(eth); return 0; } arch_initcall(machine_setup); From 0dcc1dd8a5dd9240639f1051dfaa2dffc9fbbde5 Mon Sep 17 00:00:00 2001 From: Liang He Date: Fri, 17 Jun 2022 20:44:32 +0800 Subject: [PATCH 0113/1056] xtensa: Fix refcount leak bug in time.c commit a0117dc956429f2ede17b323046e1968d1849150 upstream. In calibrate_ccount(), of_find_compatible_node() will return a node pointer with refcount incremented. We should use of_node_put() when it is not used anymore. Cc: stable@vger.kernel.org Signed-off-by: Liang He Message-Id: <20220617124432.4049006-1-windhl@126.com> Signed-off-by: Max Filippov Signed-off-by: Greg Kroah-Hartman --- arch/xtensa/kernel/time.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/xtensa/kernel/time.c b/arch/xtensa/kernel/time.c index e8ceb152860818..16b8a6273772cb 100644 --- a/arch/xtensa/kernel/time.c +++ b/arch/xtensa/kernel/time.c @@ -154,6 +154,7 @@ static void __init calibrate_ccount(void) cpu = of_find_compatible_node(NULL, NULL, "cdns,xtensa-cpu"); if (cpu) { clk = of_clk_get(cpu, 0); + of_node_put(cpu); if (!IS_ERR(clk)) { ccount_freq = clk_get_rate(clk); return; From cb4d52085c8b682aa65eed79bf76b76190fa5d73 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Tue, 7 Jun 2022 12:57:58 +0200 Subject: [PATCH 0114/1056] parisc/stifb: Fix fb_is_primary_device() only available with CONFIG_FB_STI commit 1d0811b03eb30b2f0793acaa96c6ce90b8b9c87a upstream. Fix this build error noticed by the kernel test robot: drivers/video/console/sticore.c:1132:5: error: redefinition of 'fb_is_primary_device' arch/parisc/include/asm/fb.h:18:19: note: previous definition of 'fb_is_primary_device' Signed-off-by: Helge Deller Reported-by: kernel test robot Cc: stable@vger.kernel.org # v5.10+ Signed-off-by: Greg Kroah-Hartman --- arch/parisc/include/asm/fb.h | 2 +- drivers/video/console/sticore.c | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/parisc/include/asm/fb.h b/arch/parisc/include/asm/fb.h index d63a2acb91f2b6..55d29c4f716e69 100644 --- a/arch/parisc/include/asm/fb.h +++ b/arch/parisc/include/asm/fb.h @@ -12,7 +12,7 @@ static inline void fb_pgprotect(struct file *file, struct vm_area_struct *vma, pgprot_val(vma->vm_page_prot) |= _PAGE_NO_CACHE; } -#if defined(CONFIG_STI_CONSOLE) || defined(CONFIG_FB_STI) +#if defined(CONFIG_FB_STI) int fb_is_primary_device(struct fb_info *info); #else static inline int fb_is_primary_device(struct fb_info *info) diff --git a/drivers/video/console/sticore.c b/drivers/video/console/sticore.c index 6a947ff96d6eb9..19fd3389946d95 100644 --- a/drivers/video/console/sticore.c +++ b/drivers/video/console/sticore.c @@ -1127,6 +1127,7 @@ int sti_call(const struct sti_struct *sti, unsigned long func, return ret; } +#if defined(CONFIG_FB_STI) /* check if given fb_info is the primary device */ int fb_is_primary_device(struct fb_info *info) { @@ -1142,6 +1143,7 @@ int fb_is_primary_device(struct fb_info *info) return (sti->info == info); } EXPORT_SYMBOL(fb_is_primary_device); +#endif MODULE_AUTHOR("Philipp Rumpf, Helge Deller, Thomas Bogendoerfer"); MODULE_DESCRIPTION("Core STI driver for HP's NGLE series graphics cards in HP PARISC machines"); From 6b28ca2cf3446cacc46a0b2b4c1d26dfd39e4562 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Sun, 26 Jun 2022 11:50:43 +0200 Subject: [PATCH 0115/1056] parisc: Enable ARCH_HAS_STRICT_MODULE_RWX commit 0a1355db36718178becd2bfe728a023933d73123 upstream. Fix a boot crash on a c8000 machine as reported by Dave. Basically it changes patch_map() to return an alias mapping to the to-be-patched code in order to prevent writing to write-protected memory. Signed-off-by: Helge Deller Suggested-by: John David Anglin Cc: stable@vger.kernel.org # v5.2+ Link: https://lore.kernel.org/all/e8ec39e8-25f8-e6b4-b7ed-4cb23efc756e@bell.net/ Signed-off-by: Greg Kroah-Hartman --- arch/parisc/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index 27a8b49af11fc9..5dccf01a9e172d 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -9,6 +9,7 @@ config PARISC select ARCH_WANT_FRAME_POINTERS select ARCH_HAS_ELF_RANDOMIZE select ARCH_HAS_STRICT_KERNEL_RWX + select ARCH_HAS_STRICT_MODULE_RWX select ARCH_HAS_UBSAN_SANITIZE_ALL select ARCH_NO_SG_CHAIN select ARCH_SUPPORTS_HUGETLBFS if PA20 From 416d16b7dc0ba2fd0e585a82b18babb450b13aab Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Sat, 11 Jun 2022 17:10:13 +0200 Subject: [PATCH 0116/1056] powerpc/microwatt: wire up rng during setup_arch() commit 20a9689b3607456d92c6fb764501f6a95950b098 upstream. The platform's RNG must be available before random_init() in order to be useful for initial seeding, which in turn means that it needs to be called from setup_arch(), rather than from an init call. Fortunately, each platform already has a setup_arch function pointer, which means it's easy to wire this up. This commit also removes some noisy log messages that don't add much. Fixes: c25769fddaec ("powerpc/microwatt: Add support for hardware random number generator") Cc: stable@vger.kernel.org # v5.14+ Signed-off-by: Jason A. Donenfeld Reviewed-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220611151015.548325-2-Jason@zx2c4.com Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/platforms/microwatt/microwatt.h | 7 +++++++ arch/powerpc/platforms/microwatt/rng.c | 10 +++------- arch/powerpc/platforms/microwatt/setup.c | 8 ++++++++ 3 files changed, 18 insertions(+), 7 deletions(-) create mode 100644 arch/powerpc/platforms/microwatt/microwatt.h diff --git a/arch/powerpc/platforms/microwatt/microwatt.h b/arch/powerpc/platforms/microwatt/microwatt.h new file mode 100644 index 00000000000000..335417e95e66f7 --- /dev/null +++ b/arch/powerpc/platforms/microwatt/microwatt.h @@ -0,0 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _MICROWATT_H +#define _MICROWATT_H + +void microwatt_rng_init(void); + +#endif /* _MICROWATT_H */ diff --git a/arch/powerpc/platforms/microwatt/rng.c b/arch/powerpc/platforms/microwatt/rng.c index 3d8ee6eb7dada6..8cb161533e6aaf 100644 --- a/arch/powerpc/platforms/microwatt/rng.c +++ b/arch/powerpc/platforms/microwatt/rng.c @@ -11,6 +11,7 @@ #include #include #include +#include "microwatt.h" #define DARN_ERR 0xFFFFFFFFFFFFFFFFul @@ -29,7 +30,7 @@ int microwatt_get_random_darn(unsigned long *v) return 1; } -static __init int rng_init(void) +void __init microwatt_rng_init(void) { unsigned long val; int i; @@ -37,12 +38,7 @@ static __init int rng_init(void) for (i = 0; i < 10; i++) { if (microwatt_get_random_darn(&val)) { ppc_md.get_random_seed = microwatt_get_random_darn; - return 0; + return; } } - - pr_warn("Unable to use DARN for get_random_seed()\n"); - - return -EIO; } -machine_subsys_initcall(, rng_init); diff --git a/arch/powerpc/platforms/microwatt/setup.c b/arch/powerpc/platforms/microwatt/setup.c index 0b02603bdb7475..6b32539395a485 100644 --- a/arch/powerpc/platforms/microwatt/setup.c +++ b/arch/powerpc/platforms/microwatt/setup.c @@ -16,6 +16,8 @@ #include #include +#include "microwatt.h" + static void __init microwatt_init_IRQ(void) { xics_init(); @@ -32,10 +34,16 @@ static int __init microwatt_populate(void) } machine_arch_initcall(microwatt, microwatt_populate); +static void __init microwatt_setup_arch(void) +{ + microwatt_rng_init(); +} + define_machine(microwatt) { .name = "microwatt", .probe = microwatt_probe, .init_IRQ = microwatt_init_IRQ, + .setup_arch = microwatt_setup_arch, .progress = udbg_progress, .calibrate_decr = generic_calibrate_decr, }; From fe643b5afde63936c5ff97806f15d87cae87e8cc Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Thu, 9 Jun 2022 16:03:28 +0530 Subject: [PATCH 0117/1056] powerpc: Enable execve syscall exit tracepoint commit ec6d0dde71d760aa60316f8d1c9a1b0d99213529 upstream. On execve[at], we are zero'ing out most of the thread register state including gpr[0], which contains the syscall number. Due to this, we fail to trigger the syscall exit tracepoint properly. Fix this by retaining gpr[0] in the thread register state. Before this patch: # tail /sys/kernel/debug/tracing/trace cat-123 [000] ..... 61.449351: sys_execve(filename: 7fffa6b23448, argv: 7fffa6b233e0, envp: 7fffa6b233f8) cat-124 [000] ..... 62.428481: sys_execve(filename: 7fffa6b23448, argv: 7fffa6b233e0, envp: 7fffa6b233f8) echo-125 [000] ..... 65.813702: sys_execve(filename: 7fffa6b23378, argv: 7fffa6b233a0, envp: 7fffa6b233b0) echo-125 [000] ..... 65.822214: sys_execveat(fd: 0, filename: 1009ac48, argv: 7ffff65d0c98, envp: 7ffff65d0ca8, flags: 0) After this patch: # tail /sys/kernel/debug/tracing/trace cat-127 [000] ..... 100.416262: sys_execve(filename: 7fffa41b3448, argv: 7fffa41b33e0, envp: 7fffa41b33f8) cat-127 [000] ..... 100.418203: sys_execve -> 0x0 echo-128 [000] ..... 103.873968: sys_execve(filename: 7fffa41b3378, argv: 7fffa41b33a0, envp: 7fffa41b33b0) echo-128 [000] ..... 103.875102: sys_execve -> 0x0 echo-128 [000] ..... 103.882097: sys_execveat(fd: 0, filename: 1009ac48, argv: 7fffd10d2148, envp: 7fffd10d2158, flags: 0) echo-128 [000] ..... 103.883225: sys_execveat -> 0x0 Cc: stable@vger.kernel.org Signed-off-by: Naveen N. Rao Tested-by: Sumit Dubey2 Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220609103328.41306-1-naveen.n.rao@linux.vnet.ibm.com Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/process.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 39a0a13a3a2770..c590e12199132f 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1818,7 +1818,7 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp) tm_reclaim_current(0); #endif - memset(regs->gpr, 0, sizeof(regs->gpr)); + memset(®s->gpr[1], 0, sizeof(regs->gpr) - sizeof(regs->gpr[0])); regs->ctr = 0; regs->link = 0; regs->xer = 0; From c1cfae46c5dc1d450defc5b31dd8d90ab4f46f8d Mon Sep 17 00:00:00 2001 From: Andrew Donnellan Date: Tue, 14 Jun 2022 23:49:52 +1000 Subject: [PATCH 0118/1056] powerpc/rtas: Allow ibm,platform-dump RTAS call with null buffer address commit 7bc08056a6dabc3a1442216daf527edf61ac24b6 upstream. Add a special case to block_rtas_call() to allow the ibm,platform-dump RTAS call through the RTAS filter if the buffer address is 0. According to PAPR, ibm,platform-dump is called with a null buffer address to notify the platform firmware that processing of a particular dump is finished. Without this, on a pseries machine with CONFIG_PPC_RTAS_FILTER enabled, an application such as rtas_errd that is attempting to retrieve a dump will encounter an error at the end of the retrieval process. Fixes: bd59380c5ba4 ("powerpc/rtas: Restrict RTAS requests from userspace") Cc: stable@vger.kernel.org Reported-by: Sathvika Vasireddy Signed-off-by: Andrew Donnellan Reviewed-by: Tyrel Datwyler Reviewed-by: Nathan Lynch Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220614134952.156010-1-ajd@linux.ibm.com Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/rtas.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index 3f58140370357d..e8f44084d25127 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -983,7 +983,7 @@ static struct rtas_filter rtas_filters[] __ro_after_init = { { "get-time-of-day", -1, -1, -1, -1, -1 }, { "ibm,get-vpd", -1, 0, -1, 1, 2 }, { "ibm,lpar-perftools", -1, 2, 3, -1, -1 }, - { "ibm,platform-dump", -1, 4, 5, -1, -1 }, + { "ibm,platform-dump", -1, 4, 5, -1, -1 }, /* Special cased */ { "ibm,read-slot-reset-state", -1, -1, -1, -1, -1 }, { "ibm,scan-log-dump", -1, 0, 1, -1, -1 }, { "ibm,set-dynamic-indicator", -1, 2, -1, -1, -1 }, @@ -1032,6 +1032,15 @@ static bool block_rtas_call(int token, int nargs, size = 1; end = base + size - 1; + + /* + * Special case for ibm,platform-dump - NULL buffer + * address is used to indicate end of dump processing + */ + if (!strcmp(f->name, "ibm,platform-dump") && + base == 0) + return false; + if (!in_rmo_buf(base, end)) goto err; } From 1ad385647bf3d89bcb58d39c8bde29be71e56aa7 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Tue, 21 Jun 2022 16:08:49 +0200 Subject: [PATCH 0119/1056] powerpc/powernv: wire up rng during setup_arch commit f3eac426657d985b97c92fa5f7ae1d43f04721f3 upstream. The platform's RNG must be available before random_init() in order to be useful for initial seeding, which in turn means that it needs to be called from setup_arch(), rather than from an init call. Complicating things, however, is that POWER8 systems need some per-cpu state and kmalloc, which isn't available at this stage. So we split things up into an early phase and a later opportunistic phase. This commit also removes some noisy log messages that don't add much. Fixes: a4da0d50b2a0 ("powerpc: Implement arch_get_random_long/int() for powernv") Cc: stable@vger.kernel.org # v3.13+ Signed-off-by: Jason A. Donenfeld Reviewed-by: Christophe Leroy [mpe: Add of_node_put(), use pnv naming, minor change log editing] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220621140849.127227-1-Jason@zx2c4.com Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/platforms/powernv/powernv.h | 2 + arch/powerpc/platforms/powernv/rng.c | 52 ++++++++++++++++-------- arch/powerpc/platforms/powernv/setup.c | 2 + 3 files changed, 40 insertions(+), 16 deletions(-) diff --git a/arch/powerpc/platforms/powernv/powernv.h b/arch/powerpc/platforms/powernv/powernv.h index 11df4e16a1cc3b..528946ee7a7771 100644 --- a/arch/powerpc/platforms/powernv/powernv.h +++ b/arch/powerpc/platforms/powernv/powernv.h @@ -42,4 +42,6 @@ ssize_t memcons_copy(struct memcons *mc, char *to, loff_t pos, size_t count); u32 memcons_get_size(struct memcons *mc); struct memcons *memcons_init(struct device_node *node, const char *mc_prop_name); +void pnv_rng_init(void); + #endif /* _POWERNV_H */ diff --git a/arch/powerpc/platforms/powernv/rng.c b/arch/powerpc/platforms/powernv/rng.c index 69c344c8884f36..2b5a1a41234cc6 100644 --- a/arch/powerpc/platforms/powernv/rng.c +++ b/arch/powerpc/platforms/powernv/rng.c @@ -17,6 +17,7 @@ #include #include #include +#include "powernv.h" #define DARN_ERR 0xFFFFFFFFFFFFFFFFul @@ -28,7 +29,6 @@ struct powernv_rng { static DEFINE_PER_CPU(struct powernv_rng *, powernv_rng); - int powernv_hwrng_present(void) { struct powernv_rng *rng; @@ -98,9 +98,6 @@ static int initialise_darn(void) return 0; } } - - pr_warn("Unable to use DARN for get_random_seed()\n"); - return -EIO; } @@ -163,32 +160,55 @@ static __init int rng_create(struct device_node *dn) rng_init_per_cpu(rng, dn); - pr_info_once("Registering arch random hook.\n"); - ppc_md.get_random_seed = powernv_get_random_long; return 0; } -static __init int rng_init(void) +static int __init pnv_get_random_long_early(unsigned long *v) { struct device_node *dn; - int rc; + + if (!slab_is_available()) + return 0; + + if (cmpxchg(&ppc_md.get_random_seed, pnv_get_random_long_early, + NULL) != pnv_get_random_long_early) + return 0; for_each_compatible_node(dn, NULL, "ibm,power-rng") { - rc = rng_create(dn); - if (rc) { - pr_err("Failed creating rng for %pOF (%d).\n", - dn, rc); + if (rng_create(dn)) continue; - } - /* Create devices for hwrng driver */ of_platform_device_create(dn, NULL, NULL); } - initialise_darn(); + if (!ppc_md.get_random_seed) + return 0; + return ppc_md.get_random_seed(v); +} + +void __init pnv_rng_init(void) +{ + struct device_node *dn; + /* Prefer darn over the rest. */ + if (!initialise_darn()) + return; + + dn = of_find_compatible_node(NULL, NULL, "ibm,power-rng"); + if (dn) + ppc_md.get_random_seed = pnv_get_random_long_early; + + of_node_put(dn); +} + +static int __init pnv_rng_late_init(void) +{ + unsigned long v; + /* In case it wasn't called during init for some other reason. */ + if (ppc_md.get_random_seed == pnv_get_random_long_early) + pnv_get_random_long_early(&v); return 0; } -machine_subsys_initcall(powernv, rng_init); +machine_subsys_initcall(powernv, pnv_rng_late_init); diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c index a8db3f15306394..1b3c7e04a7af52 100644 --- a/arch/powerpc/platforms/powernv/setup.c +++ b/arch/powerpc/platforms/powernv/setup.c @@ -190,6 +190,8 @@ static void __init pnv_setup_arch(void) pnv_check_guarded_cores(); /* XXX PMCS */ + + pnv_rng_init(); } static void __init pnv_init(void) From 93f7d2a7fcf33e77285db44b512eb5a8e4657fe7 Mon Sep 17 00:00:00 2001 From: Kuogee Hsieh Date: Tue, 17 May 2022 09:21:34 -0700 Subject: [PATCH 0120/1056] drm/msm/dp: Always clear mask bits to disable interrupts at dp_ctrl_reset_irq_ctrl() commit 993a2adc6e2e94a0a7b5bfc054eda90ac95f62c3 upstream. dp_catalog_ctrl_reset() will software reset DP controller. But it will not reset programmable registers to default value. DP driver still have to clear mask bits to interrupt status registers to disable interrupts after software reset of controller. At current implementation, dp_ctrl_reset_irq_ctrl() will software reset dp controller but did not call dp_catalog_ctrl_enable_irq(false) to clear hpd related interrupt mask bits to disable hpd related interrupts due to it mistakenly think hpd related interrupt mask bits will be cleared by software reset of dp controller automatically. This mistake may cause system to crash during suspending procedure due to unexpected irq fired and trigger event thread to access dp controller registers with controller clocks are disabled. This patch fixes system crash during suspending problem by removing "enable" flag condition checking at dp_ctrl_reset_irq_ctrl() so that hpd related interrupt mask bits are cleared to prevent unexpected from happening. Changes in v2: -- add more details commit text Changes in v3: -- add synchrons_irq() -- add atomic_t suspended Changes in v4: -- correct Fixes's commit ID -- remove synchrons_irq() Changes in v5: -- revise commit text Changes in v6: -- add event_lock to protect "suspended" Changes in v7: -- delete "suspended" flag Fixes: 989ebe7bc446 ("drm/msm/dp: do not initialize phy until plugin interrupt received") Signed-off-by: Kuogee Hsieh Reviewed-by: Stephen Boyd Patchwork: https://patchwork.freedesktop.org/patch/486591/ Link: https://lore.kernel.org/r/1652804494-19650-1-git-send-email-quic_khsieh@quicinc.com Signed-off-by: Abhinav Kumar Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/msm/dp/dp_ctrl.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/msm/dp/dp_ctrl.c b/drivers/gpu/drm/msm/dp/dp_ctrl.c index afffdad0ebf923..b6f4ce2a48afea 100644 --- a/drivers/gpu/drm/msm/dp/dp_ctrl.c +++ b/drivers/gpu/drm/msm/dp/dp_ctrl.c @@ -1356,8 +1356,13 @@ void dp_ctrl_reset_irq_ctrl(struct dp_ctrl *dp_ctrl, bool enable) dp_catalog_ctrl_reset(ctrl->catalog); - if (enable) - dp_catalog_ctrl_enable_irq(ctrl->catalog, enable); + /* + * all dp controller programmable registers will not + * be reset to default value after DP_SW_RESET + * therefore interrupt mask bits have to be updated + * to enable/disable interrupts + */ + dp_catalog_ctrl_enable_irq(ctrl->catalog, enable); } void dp_ctrl_phy_init(struct dp_ctrl *dp_ctrl) From c845b98be950768b7971e5a2d47f4ffca876e3f1 Mon Sep 17 00:00:00 2001 From: Alexander Stein Date: Tue, 10 May 2022 07:46:12 +0200 Subject: [PATCH 0121/1056] ARM: dts: imx7: Move hsic_phy power domain to HSIC PHY node commit 552ca27929ab28b341ae9b2629f0de3a84c98ee8 upstream. Move the power domain to its actual user. This keeps the power domain enabled even when the USB host is runtime suspended. This is necessary to detect any downstream events, like device attach. Fixes: 02f8eb40ef7b ("ARM: dts: imx7s: Add power domain for imx7d HSIC") Suggested-by: Jun Li Signed-off-by: Alexander Stein Reviewed-by: Fabio Estevam Signed-off-by: Shawn Guo Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/imx7s.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/imx7s.dtsi b/arch/arm/boot/dts/imx7s.dtsi index 1843fc0538709d..95f22513a7c024 100644 --- a/arch/arm/boot/dts/imx7s.dtsi +++ b/arch/arm/boot/dts/imx7s.dtsi @@ -104,6 +104,7 @@ compatible = "usb-nop-xceiv"; clocks = <&clks IMX7D_USB_HSIC_ROOT_CLK>; clock-names = "main_clk"; + power-domains = <&pgc_hsic_phy>; #phy-cells = <0>; }; @@ -1135,7 +1136,6 @@ compatible = "fsl,imx7d-usb", "fsl,imx27-usb"; reg = <0x30b30000 0x200>; interrupts = ; - power-domains = <&pgc_hsic_phy>; clocks = <&clks IMX7D_USB_CTRL_CLK>; fsl,usbphy = <&usbphynop3>; fsl,usbmisc = <&usbmisc3 0>; From 4b5047643466c3ef7ea71cc8c0f5d2dc2631d7ce Mon Sep 17 00:00:00 2001 From: Lucas Stach Date: Wed, 11 May 2022 18:08:23 +0200 Subject: [PATCH 0122/1056] ARM: dts: imx6qdl: correct PU regulator ramp delay commit 93a8ba2a619816d631bd69e9ce2172b4d7a481b8 upstream. Contrary to what was believed at the time, the ramp delay of 150us is not plenty for the PU LDO with the default step time of 512 pulses of the 24MHz clock. Measurements have shown that after enabling the LDO the voltage on VDDPU_CAP jumps to ~750mV in the first step and after that the regulator executes the normal ramp up as defined by the step size control. This means it takes the regulator between 360us and 370us to ramp up to the nominal 1.15V voltage for this power domain. With the old setting of the ramp delay the power up of the PU GPC domain would happen in the middle of the regulator ramp with the voltage being at around 900mV. Apparently this was enough for most units to properly power up the peripherals in the domain and execute the reset. Some units however, fail to power up properly, especially when the chip is at a low temperature. In that case any access to the GPU registers would yield an incorrect result with no way to recover from this situation. Change the ramp delay to 380us to cover the measured ramp up time with a bit of additional slack. Fixes: 40130d327f72 ("ARM: dts: imx6qdl: Allow disabling the PU regulator, add a enable ramp delay") Signed-off-by: Lucas Stach Signed-off-by: Shawn Guo Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/imx6qdl.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/imx6qdl.dtsi b/arch/arm/boot/dts/imx6qdl.dtsi index 89c342f3a7c2f7..8520ffc1779b66 100644 --- a/arch/arm/boot/dts/imx6qdl.dtsi +++ b/arch/arm/boot/dts/imx6qdl.dtsi @@ -763,7 +763,7 @@ regulator-name = "vddpu"; regulator-min-microvolt = <725000>; regulator-max-microvolt = <1450000>; - regulator-enable-ramp-delay = <150>; + regulator-enable-ramp-delay = <380>; anatop-reg-offset = <0x140>; anatop-vol-bit-shift = <9>; anatop-vol-bit-width = <5>; From 5e00d3d4023c1d60e53d9200a66fe934b07ce290 Mon Sep 17 00:00:00 2001 From: Aswath Govindraju Date: Thu, 12 May 2022 12:18:58 +0530 Subject: [PATCH 0123/1056] arm64: dts: ti: k3-am64-main: Remove support for HS400 speed mode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 0c0af88f3f318e73237f7fadd02d0bf2b6c996bb upstream. AM64 SoC, does not support HS400 and HS200 is the maximum supported speed mode[1]. Therefore, fix the device tree node to reflect the same. [1] - https://www.ti.com/lit/ds/symlink/am6442.pdf (SPRSP56C – JANUARY 2021 – REVISED FEBRUARY 2022) Fixes: 8abae9389bdb ("arm64: dts: ti: Add support for AM642 SoC") Signed-off-by: Aswath Govindraju Signed-off-by: Nishanth Menon Link: https://lore.kernel.org/r/20220512064859.32059-1-a-govindraju@ti.com Signed-off-by: Greg Kroah-Hartman --- arch/arm64/boot/dts/ti/k3-am64-main.dtsi | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/arm64/boot/dts/ti/k3-am64-main.dtsi b/arch/arm64/boot/dts/ti/k3-am64-main.dtsi index 86291f3469f150..d195b97ab2eef9 100644 --- a/arch/arm64/boot/dts/ti/k3-am64-main.dtsi +++ b/arch/arm64/boot/dts/ti/k3-am64-main.dtsi @@ -456,13 +456,11 @@ clock-names = "clk_ahb", "clk_xin"; mmc-ddr-1_8v; mmc-hs200-1_8v; - mmc-hs400-1_8v; ti,trm-icp = <0x2>; ti,otap-del-sel-legacy = <0x0>; ti,otap-del-sel-mmc-hs = <0x0>; ti,otap-del-sel-ddr52 = <0x6>; ti,otap-del-sel-hs200 = <0x7>; - ti,otap-del-sel-hs400 = <0x4>; }; sdhci1: mmc@fa00000 { From d23f76018e17618559da9eea179d137362023f95 Mon Sep 17 00:00:00 2001 From: Miaoqian Lin Date: Mon, 23 May 2022 18:55:13 +0400 Subject: [PATCH 0124/1056] ARM: exynos: Fix refcount leak in exynos_map_pmu commit c4c79525042a4a7df96b73477feaf232fe44ae81 upstream. of_find_matching_node() returns a node pointer with refcount incremented, we should use of_node_put() on it when not need anymore. Add missing of_node_put() to avoid refcount leak. of_node_put() checks null pointer. Fixes: fce9e5bb2526 ("ARM: EXYNOS: Add support for mapping PMU base address via DT") Signed-off-by: Miaoqian Lin Link: https://lore.kernel.org/r/20220523145513.12341-1-linmq006@gmail.com Signed-off-by: Krzysztof Kozlowski Signed-off-by: Greg Kroah-Hartman --- arch/arm/mach-exynos/exynos.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/mach-exynos/exynos.c b/arch/arm/mach-exynos/exynos.c index 8b48326be9fd57..51a247ca4da8c8 100644 --- a/arch/arm/mach-exynos/exynos.c +++ b/arch/arm/mach-exynos/exynos.c @@ -149,6 +149,7 @@ static void exynos_map_pmu(void) np = of_find_matching_node(NULL, exynos_dt_pmu_match); if (np) pmu_base_addr = of_iomap(np, 0); + of_node_put(np); } static void __init exynos_init_irq(void) From 10ba9d499a9fd82ed40897e734ba19870a879407 Mon Sep 17 00:00:00 2001 From: Miaoqian Lin Date: Thu, 26 May 2022 11:53:22 +0400 Subject: [PATCH 0125/1056] soc: bcm: brcmstb: pm: pm-arm: Fix refcount leak in brcmstb_pm_probe commit 37d838de369b07b596c19ff3662bf0293fdb09ee upstream. of_find_matching_node() returns a node pointer with refcount incremented, we should use of_node_put() on it when not need anymore. Add missing of_node_put() to avoid refcount leak. In brcmstb_init_sram, it pass dn to of_address_to_resource(), of_address_to_resource() will call of_find_device_by_node() to take reference, so we should release the reference returned by of_find_matching_node(). Fixes: 0b741b8234c8 ("soc: bcm: brcmstb: Add support for S2/S3/S5 suspend states (ARM)") Signed-off-by: Miaoqian Lin Reviewed-by: Andy Shevchenko Signed-off-by: Florian Fainelli Signed-off-by: Greg Kroah-Hartman --- drivers/soc/bcm/brcmstb/pm/pm-arm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/soc/bcm/brcmstb/pm/pm-arm.c b/drivers/soc/bcm/brcmstb/pm/pm-arm.c index 3cbb165d6e3090..70ad0f3dce283a 100644 --- a/drivers/soc/bcm/brcmstb/pm/pm-arm.c +++ b/drivers/soc/bcm/brcmstb/pm/pm-arm.c @@ -783,6 +783,7 @@ static int brcmstb_pm_probe(struct platform_device *pdev) } ret = brcmstb_init_sram(dn); + of_node_put(dn); if (ret) { pr_err("error setting up SRAM for PM\n"); return ret; From 4d9c60e868f7cf8e09956e7d5bb44d807d712699 Mon Sep 17 00:00:00 2001 From: Miaoqian Lin Date: Wed, 1 Jun 2022 13:05:48 +0400 Subject: [PATCH 0126/1056] ARM: Fix refcount leak in axxia_boot_secondary commit 7c7ff68daa93d8c4cdea482da4f2429c0398fcde upstream. of_find_compatible_node() returns a node pointer with refcount incremented, we should use of_node_put() on it when done. Add missing of_node_put() to avoid refcount leak. Fixes: 1d22924e1c4e ("ARM: Add platform support for LSI AXM55xx SoC") Signed-off-by: Miaoqian Lin Link: https://lore.kernel.org/r/20220601090548.47616-1-linmq006@gmail.com' Signed-off-by: Arnd Bergmann Signed-off-by: Greg Kroah-Hartman --- arch/arm/mach-axxia/platsmp.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/mach-axxia/platsmp.c b/arch/arm/mach-axxia/platsmp.c index 512943eae30a55..2e203626eda529 100644 --- a/arch/arm/mach-axxia/platsmp.c +++ b/arch/arm/mach-axxia/platsmp.c @@ -39,6 +39,7 @@ static int axxia_boot_secondary(unsigned int cpu, struct task_struct *idle) return -ENOENT; syscon = of_iomap(syscon_np, 0); + of_node_put(syscon_np); if (!syscon) return -ENOMEM; From cde4480b5ab06195b9164184b0c02ced71e601b4 Mon Sep 17 00:00:00 2001 From: Miaoqian Lin Date: Thu, 2 Jun 2022 08:17:21 +0400 Subject: [PATCH 0127/1056] memory: samsung: exynos5422-dmc: Fix refcount leak in of_get_dram_timings commit 1332661e09304b7b8e84e5edc11811ba08d12abe upstream. of_parse_phandle() returns a node pointer with refcount incremented, we should use of_node_put() on it when not need anymore. This function doesn't call of_node_put() in some error paths. To unify the structure, Add put_node label and goto it on errors. Fixes: 6e7674c3c6df ("memory: Add DMC driver for Exynos5422") Signed-off-by: Miaoqian Lin Reviewed-by: Lukasz Luba Link: https://lore.kernel.org/r/20220602041721.64348-1-linmq006@gmail.com Signed-off-by: Krzysztof Kozlowski Signed-off-by: Greg Kroah-Hartman --- drivers/memory/samsung/exynos5422-dmc.c | 29 +++++++++++++++---------- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/drivers/memory/samsung/exynos5422-dmc.c b/drivers/memory/samsung/exynos5422-dmc.c index 4733e7898ffe53..c491cd549644fa 100644 --- a/drivers/memory/samsung/exynos5422-dmc.c +++ b/drivers/memory/samsung/exynos5422-dmc.c @@ -1187,33 +1187,39 @@ static int of_get_dram_timings(struct exynos5_dmc *dmc) dmc->timing_row = devm_kmalloc_array(dmc->dev, TIMING_COUNT, sizeof(u32), GFP_KERNEL); - if (!dmc->timing_row) - return -ENOMEM; + if (!dmc->timing_row) { + ret = -ENOMEM; + goto put_node; + } dmc->timing_data = devm_kmalloc_array(dmc->dev, TIMING_COUNT, sizeof(u32), GFP_KERNEL); - if (!dmc->timing_data) - return -ENOMEM; + if (!dmc->timing_data) { + ret = -ENOMEM; + goto put_node; + } dmc->timing_power = devm_kmalloc_array(dmc->dev, TIMING_COUNT, sizeof(u32), GFP_KERNEL); - if (!dmc->timing_power) - return -ENOMEM; + if (!dmc->timing_power) { + ret = -ENOMEM; + goto put_node; + } dmc->timings = of_lpddr3_get_ddr_timings(np_ddr, dmc->dev, DDR_TYPE_LPDDR3, &dmc->timings_arr_size); if (!dmc->timings) { - of_node_put(np_ddr); dev_warn(dmc->dev, "could not get timings from DT\n"); - return -EINVAL; + ret = -EINVAL; + goto put_node; } dmc->min_tck = of_lpddr3_get_min_tck(np_ddr, dmc->dev); if (!dmc->min_tck) { - of_node_put(np_ddr); dev_warn(dmc->dev, "could not get tck from DT\n"); - return -EINVAL; + ret = -EINVAL; + goto put_node; } /* Sorted array of OPPs with frequency ascending */ @@ -1227,13 +1233,14 @@ static int of_get_dram_timings(struct exynos5_dmc *dmc) clk_period_ps); } - of_node_put(np_ddr); /* Take the highest frequency's timings as 'bypass' */ dmc->bypass_timing_row = dmc->timing_row[idx - 1]; dmc->bypass_timing_data = dmc->timing_data[idx - 1]; dmc->bypass_timing_power = dmc->timing_power[idx - 1]; +put_node: + of_node_put(np_ddr); return ret; } From da3ee7cd2f15922ad88a7ca6deee2eafdc7cd214 Mon Sep 17 00:00:00 2001 From: Miaoqian Lin Date: Sun, 5 Jun 2022 11:58:41 +0400 Subject: [PATCH 0128/1056] ARM: cns3xxx: Fix refcount leak in cns3xxx_init commit 1ba904b6b16e08de5aed7c1349838d9cd0d178c5 upstream. of_find_compatible_node() returns a node pointer with refcount incremented, we should use of_node_put() on it when done. Add missing of_node_put() to avoid refcount leak. Fixes: 415f59142d9d ("ARM: cns3xxx: initial DT support") Signed-off-by: Miaoqian Lin Acked-by: Krzysztof Halasa Signed-off-by: Arnd Bergmann Signed-off-by: Greg Kroah-Hartman --- arch/arm/mach-cns3xxx/core.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm/mach-cns3xxx/core.c b/arch/arm/mach-cns3xxx/core.c index e4f4b20b83a2d9..3fc4ec830e3a33 100644 --- a/arch/arm/mach-cns3xxx/core.c +++ b/arch/arm/mach-cns3xxx/core.c @@ -372,6 +372,7 @@ static void __init cns3xxx_init(void) /* De-Asscer SATA Reset */ cns3xxx_pwr_soft_rst(CNS3XXX_PWR_SOFTWARE_RST(SATA)); } + of_node_put(dn); dn = of_find_compatible_node(NULL, NULL, "cavium,cns3420-sdhci"); if (of_device_is_available(dn)) { @@ -385,6 +386,7 @@ static void __init cns3xxx_init(void) cns3xxx_pwr_clk_en(CNS3XXX_PWR_CLK_EN(SDIO)); cns3xxx_pwr_soft_rst(CNS3XXX_PWR_SOFTWARE_RST(SDIO)); } + of_node_put(dn); pm_power_off = cns3xxx_power_off; From bcf2087ce4de18a55c23b1985b7ba8440224b775 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sat, 11 Jun 2022 03:32:30 +0900 Subject: [PATCH 0129/1056] modpost: fix section mismatch check for exported init/exit sections commit 28438794aba47a27e922857d27b31b74e8559143 upstream. Since commit f02e8a6596b7 ("module: Sort exported symbols"), EXPORT_SYMBOL* is placed in the individual section ___ksymtab(_gpl)+ (3 leading underscores instead of 2). Since then, modpost cannot detect the bad combination of EXPORT_SYMBOL and __init/__exit. Fix the .fromsec field. Fixes: f02e8a6596b7 ("module: Sort exported symbols") Signed-off-by: Masahiro Yamada Reviewed-by: Nick Desaulniers Signed-off-by: Greg Kroah-Hartman --- scripts/mod/modpost.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index 94041ee32798fd..b284ee01fdebb9 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -1108,7 +1108,7 @@ static const struct sectioncheck sectioncheck[] = { }, /* Do not export init/exit functions or data */ { - .fromsec = { "__ksymtab*", NULL }, + .fromsec = { "___ksymtab*", NULL }, .bad_tosec = { INIT_SECTIONS, EXIT_SECTIONS, NULL }, .mismatch = EXPORT_TO_INIT_EXIT, .symbol_white_list = { DEFAULT_SYMBOL_WHITE_LIST, NULL }, From 7a3a4683562ee54be46697bc9bbc55e1f14c051e Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Wed, 15 Jun 2022 23:05:34 +0200 Subject: [PATCH 0130/1056] ARM: dts: bcm2711-rpi-400: Fix GPIO line names commit b9b6d4c925604b70d007feb4c77b8cc4c038d2da upstream. The GPIO expander line names has been fixed in the vendor tree last year, so upstream these changes. Fixes: 1c701accecf2 ("ARM: dts: Add Raspberry Pi 400 support") Reported-by: Ivan T. Ivanov Signed-off-by: Stefan Wahren Signed-off-by: Florian Fainelli Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/bcm2711-rpi-400.dts | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm/boot/dts/bcm2711-rpi-400.dts b/arch/arm/boot/dts/bcm2711-rpi-400.dts index f4d2fc20397c70..c53d9eb0b80270 100644 --- a/arch/arm/boot/dts/bcm2711-rpi-400.dts +++ b/arch/arm/boot/dts/bcm2711-rpi-400.dts @@ -28,12 +28,12 @@ &expgpio { gpio-line-names = "BT_ON", "WL_ON", - "", + "PWR_LED_OFF", "GLOBAL_RESET", "VDD_SD_IO_SEL", - "CAM_GPIO", + "GLOBAL_SHUTDOWN", "SD_PWR_ON", - "SD_OC_N"; + "SHUTDOWN_REQUEST"; }; &genet_mdio { From 46a78d1413350571eeffc5a8b2ff1eceeb9e305b Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Mon, 20 Jun 2022 11:03:48 +0200 Subject: [PATCH 0131/1056] random: update comment from copy_to_user() -> copy_to_iter() commit 63b8ea5e4f1a87dea4d3114293fc8e96a8f193d7 upstream. This comment wasn't updated when we moved from read() to read_iter(), so this patch makes the trivial fix. Fixes: 1b388e7765f2 ("random: convert to using fops->read_iter()") Signed-off-by: Jason A. Donenfeld Signed-off-by: Greg Kroah-Hartman --- drivers/char/random.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index d6f2c62020b5de..7bd6eb15d432ee 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -452,7 +452,7 @@ static ssize_t get_random_bytes_user(struct iov_iter *iter) /* * Immediately overwrite the ChaCha key at index 4 with random - * bytes, in case userspace causes copy_to_user() below to sleep + * bytes, in case userspace causes copy_to_iter() below to sleep * forever, so that we still retain forward secrecy in that case. */ crng_make_state(chacha_state, (u8 *)&chacha_state[4], CHACHA_KEY_SIZE); From a8bbb4c26460cf5b6cda080c58c49faf10e9bcda Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 21 Jun 2022 15:51:44 +0300 Subject: [PATCH 0132/1056] perf build-id: Fix caching files with a wrong build ID commit ab66fdace8581ef3b4e7cf5381a168ed4058d779 upstream. Build ID events associate a file name with a build ID. However, when using perf inject, there is no guarantee that the file on the current machine at the current time has that build ID. Fix by comparing the build IDs and skip adding to the cache if they are different. Example: $ echo "int main() {return 0;}" > prog.c $ gcc -o prog prog.c $ perf record --buildid-all ./prog [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.019 MB perf.data ] $ file-buildid() { file $1 | awk -F= '{print $2}' | awk -F, '{print $1}' ; } $ file-buildid prog 444ad9be165d8058a48ce2ffb4e9f55854a3293e $ file-buildid ~/.debug/$(pwd)/prog/444ad9be165d8058a48ce2ffb4e9f55854a3293e/elf 444ad9be165d8058a48ce2ffb4e9f55854a3293e $ echo "int main() {return 1;}" > prog.c $ gcc -o prog prog.c $ file-buildid prog 885524d5aaa24008a3e2b06caa3ea95d013c0fc5 Before: $ perf buildid-cache --purge $(pwd)/prog $ perf inject -i perf.data -o junk $ file-buildid ~/.debug/$(pwd)/prog/444ad9be165d8058a48ce2ffb4e9f55854a3293e/elf 885524d5aaa24008a3e2b06caa3ea95d013c0fc5 $ After: $ perf buildid-cache --purge $(pwd)/prog $ perf inject -i perf.data -o junk $ file-buildid ~/.debug/$(pwd)/prog/444ad9be165d8058a48ce2ffb4e9f55854a3293e/elf $ Fixes: 454c407ec17a0c63 ("perf: add perf-inject builtin") Signed-off-by: Adrian Hunter Cc: Jiri Olsa Cc: Namhyung Kim Cc: Tom Zanussi Link: https://lore.kernel.org/r/20220621125144.5623-1-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Greg Kroah-Hartman --- tools/perf/util/build-id.c | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c index e32e8f2ff3bd7e..1d7c53873dd2d2 100644 --- a/tools/perf/util/build-id.c +++ b/tools/perf/util/build-id.c @@ -872,6 +872,30 @@ int build_id_cache__remove_s(const char *sbuild_id) return err; } +static int filename__read_build_id_ns(const char *filename, + struct build_id *bid, + struct nsinfo *nsi) +{ + struct nscookie nsc; + int ret; + + nsinfo__mountns_enter(nsi, &nsc); + ret = filename__read_build_id(filename, bid); + nsinfo__mountns_exit(&nsc); + + return ret; +} + +static bool dso__build_id_mismatch(struct dso *dso, const char *name) +{ + struct build_id bid; + + if (filename__read_build_id_ns(name, &bid, dso->nsinfo) < 0) + return false; + + return !dso__build_id_equal(dso, &bid); +} + static int dso__cache_build_id(struct dso *dso, struct machine *machine, void *priv __maybe_unused) { @@ -886,6 +910,10 @@ static int dso__cache_build_id(struct dso *dso, struct machine *machine, is_kallsyms = true; name = machine->mmap_name; } + + if (!is_kallsyms && dso__build_id_mismatch(dso, name)) + return 0; + return build_id_cache__add_b(&dso->bid, name, dso->nsinfo, is_kallsyms, is_vdso); } From cced9ce619ef483616b9e5ef4da6287ba2292a29 Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Wed, 22 Jun 2022 12:14:24 -0700 Subject: [PATCH 0133/1056] dma-direct: use the correct size for dma_set_encrypted() commit 3be4562584bba603f33863a00c1c32eecf772ee6 upstream. The third parameter of dma_set_encrypted() is a size in bytes rather than the number of pages. Fixes: 4d0564785bb0 ("dma-direct: factor out dma_set_{de,en}crypted helpers") Signed-off-by: Dexuan Cui Reviewed-by: Robin Murphy Signed-off-by: Christoph Hellwig Signed-off-by: Greg Kroah-Hartman --- kernel/dma/direct.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c index 854d6df969de29..ed5dd9e023241a 100644 --- a/kernel/dma/direct.c +++ b/kernel/dma/direct.c @@ -323,7 +323,7 @@ void dma_direct_free(struct device *dev, size_t size, } else { if (IS_ENABLED(CONFIG_ARCH_HAS_DMA_CLEAR_UNCACHED)) arch_dma_clear_uncached(cpu_addr, size); - if (dma_set_encrypted(dev, cpu_addr, 1 << page_order)) + if (dma_set_encrypted(dev, cpu_addr, size)) return; } @@ -360,7 +360,6 @@ void dma_direct_free_pages(struct device *dev, size_t size, struct page *page, dma_addr_t dma_addr, enum dma_data_direction dir) { - unsigned int page_order = get_order(size); void *vaddr = page_address(page); /* If cpu_addr is not from an atomic pool, dma_free_from_pool() fails */ @@ -368,7 +367,7 @@ void dma_direct_free_pages(struct device *dev, size_t size, dma_free_from_pool(dev, vaddr, size)) return; - if (dma_set_encrypted(dev, vaddr, 1 << page_order)) + if (dma_set_encrypted(dev, vaddr, size)) return; __dma_direct_free_pages(dev, page, size); } From 17aa69b458fde6c4a9cf5e2ff73d5b1a9346651b Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Fri, 24 Jun 2022 04:11:47 +0900 Subject: [PATCH 0134/1056] kbuild: link vmlinux only once for CONFIG_TRIM_UNUSED_KSYMS (2nd attempt) commit 53632ba87d9f302a8d97a11ec2f4f4eec7bb75ea upstream. If CONFIG_TRIM_UNUSED_KSYMS is enabled and the kernel is built from a pristine state, the vmlinux is linked twice. Commit 3fdc7d3fe4c0 ("kbuild: link vmlinux only once for CONFIG_TRIM_UNUSED_KSYMS") explains why this happens, but it did not fix the issue at all. Now I realized I had applied a wrong patch. In v1 patch [1], the autoksyms_recursive target correctly recurses to "$(MAKE) -f $(srctree)/Makefile autoksyms_recursive". In v2 patch [2], I accidentally dropped the diff line, and it recurses to "$(MAKE) -f $(srctree)/Makefile vmlinux". Restore the code I intended in v1. [1]: https://lore.kernel.org/linux-kbuild/1521045861-22418-8-git-send-email-yamada.masahiro@socionext.com/ [2]: https://lore.kernel.org/linux-kbuild/1521166725-24157-8-git-send-email-yamada.masahiro@socionext.com/ Fixes: 3fdc7d3fe4c0 ("kbuild: link vmlinux only once for CONFIG_TRIM_UNUSED_KSYMS") Signed-off-by: Masahiro Yamada Tested-by: Sami Tolvanen Reviewed-by: Nick Desaulniers Signed-off-by: Greg Kroah-Hartman --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 03b3a493fcca12..db79a3f4734aa6 100644 --- a/Makefile +++ b/Makefile @@ -1163,7 +1163,7 @@ KBUILD_MODULES := 1 autoksyms_recursive: descend modules.order $(Q)$(CONFIG_SHELL) $(srctree)/scripts/adjust_autoksyms.sh \ - "$(MAKE) -f $(srctree)/Makefile vmlinux" + "$(MAKE) -f $(srctree)/Makefile autoksyms_recursive" endif autoksyms_h := $(if $(CONFIG_TRIM_UNUSED_KSYMS), include/generated/autoksyms.h) From 7fc188a9a9ccdb70ccec5b2644aad0db97677611 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Sat, 11 Jun 2022 17:10:15 +0200 Subject: [PATCH 0135/1056] powerpc/pseries: wire up rng during setup_arch() commit e561e472a3d441753bd012333b057f48fef1045b upstream. The platform's RNG must be available before random_init() in order to be useful for initial seeding, which in turn means that it needs to be called from setup_arch(), rather than from an init call. Fortunately, each platform already has a setup_arch function pointer, which means it's easy to wire this up. This commit also removes some noisy log messages that don't add much. Fixes: a489043f4626 ("powerpc/pseries: Implement arch_get_random_long() based on H_RANDOM") Cc: stable@vger.kernel.org # v3.13+ Signed-off-by: Jason A. Donenfeld Reviewed-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220611151015.548325-4-Jason@zx2c4.com Signed-off-by: Jason A. Donenfeld Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/platforms/pseries/pseries.h | 2 ++ arch/powerpc/platforms/pseries/rng.c | 11 +++-------- arch/powerpc/platforms/pseries/setup.c | 2 ++ 3 files changed, 7 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/platforms/pseries/pseries.h b/arch/powerpc/platforms/pseries/pseries.h index 3544778e06d01a..2a97cc20fe8fea 100644 --- a/arch/powerpc/platforms/pseries/pseries.h +++ b/arch/powerpc/platforms/pseries/pseries.h @@ -115,4 +115,6 @@ extern u32 pseries_security_flavor; void pseries_setup_security_mitigations(void); void pseries_lpar_read_hblkrm_characteristics(void); +void pseries_rng_init(void); + #endif /* _PSERIES_PSERIES_H */ diff --git a/arch/powerpc/platforms/pseries/rng.c b/arch/powerpc/platforms/pseries/rng.c index 6268545947b838..6ddfdeaace9ef6 100644 --- a/arch/powerpc/platforms/pseries/rng.c +++ b/arch/powerpc/platforms/pseries/rng.c @@ -10,6 +10,7 @@ #include #include #include +#include "pseries.h" static int pseries_get_random_long(unsigned long *v) @@ -24,19 +25,13 @@ static int pseries_get_random_long(unsigned long *v) return 0; } -static __init int rng_init(void) +void __init pseries_rng_init(void) { struct device_node *dn; dn = of_find_compatible_node(NULL, NULL, "ibm,random"); if (!dn) - return -ENODEV; - - pr_info("Registering arch random hook.\n"); - + return; ppc_md.get_random_seed = pseries_get_random_long; - of_node_put(dn); - return 0; } -machine_subsys_initcall(pseries, rng_init); diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index f79126f16258a5..c2b3752684b5fd 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -840,6 +840,8 @@ static void __init pSeries_setup_arch(void) if (swiotlb_force == SWIOTLB_FORCE) ppc_swiotlb_enable = 1; + + pseries_rng_init(); } static void pseries_panic(char *str) From 37238449af786e1be06f193ab54a60a39a776826 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 29 Jun 2022 09:03:32 +0200 Subject: [PATCH 0136/1056] Linux 5.15.51 Link: https://lore.kernel.org/r/20220627111938.151743692@linuxfoundation.org Tested-by: Jon Hunter Tested-by: Florian Fainelli Tested-by: Shuah Khan Tested-by: Guenter Roeck Tested-by: Bagas Sanjaya Tested-by: Ron Economos Tested-by: Sudip Mukherjee Signed-off-by: Greg Kroah-Hartman --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index db79a3f4734aa6..b3bc9d907bed39 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 5 PATCHLEVEL = 15 -SUBLEVEL = 50 +SUBLEVEL = 51 EXTRAVERSION = NAME = Trick or Treat From f4a80ec8c51d68be4b7a7830c510f75080c5e417 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Mon, 27 Jun 2022 12:22:09 +0900 Subject: [PATCH 0137/1056] tick/nohz: unexport __init-annotated tick_nohz_full_setup() commit 2390095113e98fc52fffe35c5206d30d9efe3f78 upstream. EXPORT_SYMBOL and __init is a bad combination because the .init.text section is freed up after the initialization. Hence, modules cannot use symbols annotated __init. The access to a freed symbol may end up with kernel panic. modpost used to detect it, but it had been broken for a decade. Commit 28438794aba4 ("modpost: fix section mismatch check for exported init/exit sections") fixed it so modpost started to warn it again, then this showed up: MODPOST vmlinux.symvers WARNING: modpost: vmlinux.o(___ksymtab_gpl+tick_nohz_full_setup+0x0): Section mismatch in reference from the variable __ksymtab_tick_nohz_full_setup to the function .init.text:tick_nohz_full_setup() The symbol tick_nohz_full_setup is exported and annotated __init Fix this by removing the __init annotation of tick_nohz_full_setup or drop the export. Drop the export because tick_nohz_full_setup() is only called from the built-in code in kernel/sched/isolation.c. Fixes: ae9e557b5be2 ("time: Export tick start/stop functions for rcutorture") Reported-by: Linus Torvalds Signed-off-by: Masahiro Yamada Tested-by: Paul E. McKenney Signed-off-by: Linus Torvalds Cc: Thomas Backlund Signed-off-by: Greg Kroah-Hartman --- kernel/time/tick-sched.c | 1 - 1 file changed, 1 deletion(-) diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 19e6b861de97be..9c6f661fb4362b 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -509,7 +509,6 @@ void __init tick_nohz_full_setup(cpumask_var_t cpumask) cpumask_copy(tick_nohz_full_mask, cpumask); tick_nohz_full_running = true; } -EXPORT_SYMBOL_GPL(tick_nohz_full_setup); static int tick_nohz_cpu_down(unsigned int cpu) { From edbaf6e5e93acda96aae23ba134ef3c1466da3b5 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 30 Jun 2022 12:19:47 +0200 Subject: [PATCH 0138/1056] x86, kvm: use proper ASM macros for kvm_vcpu_is_preempted The build rightfully complains about: arch/x86/kernel/kvm.o: warning: objtool: __raw_callee_save___kvm_vcpu_is_preempted()+0x12: missing int3 after ret because the ASM_RET call is not being used correctly in kvm_vcpu_is_preempted(). This was hand-fixed-up in the kvm merge commit a4cfff3f0f8c ("Merge branch 'kvm-older-features' into HEAD") which of course can not be backported to stable kernels, so just fix this up directly instead. Cc: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/kvm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 9e3af56747e8f0..eba6485a59a392 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -948,7 +948,7 @@ asm( "movq __per_cpu_offset(,%rdi,8), %rax;" "cmpb $0, " __stringify(KVM_STEAL_TIME_preempted) "+steal_time(%rax);" "setne %al;" -"ret;" +ASM_RET ".size __raw_callee_save___kvm_vcpu_is_preempted, .-__raw_callee_save___kvm_vcpu_is_preempted;" ".popsection"); From 1cdcd496b7ca3821df8b8551f1775b53efc934e0 Mon Sep 17 00:00:00 2001 From: Coly Li Date: Fri, 27 May 2022 23:28:16 +0800 Subject: [PATCH 0139/1056] bcache: memset on stack variables in bch_btree_check() and bch_sectors_dirty_init() commit 7d6b902ea0e02b2a25c480edf471cbaa4ebe6b3c upstream. The local variables check_state (in bch_btree_check()) and state (in bch_sectors_dirty_init()) should be fully filled by 0, because before allocating them on stack, they were dynamically allocated by kzalloc(). Signed-off-by: Coly Li Link: https://lore.kernel.org/r/20220527152818.27545-2-colyli@suse.de Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- drivers/md/bcache/btree.c | 1 + drivers/md/bcache/writeback.c | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index 7b6f8bfef92708..98daa9d200f79a 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c @@ -2017,6 +2017,7 @@ int bch_btree_check(struct cache_set *c) if (c->root->level == 0) return 0; + memset(&check_state, 0, sizeof(struct btree_check_state)); check_state.c = c; check_state.total_threads = bch_btree_chkthread_nr(); check_state.key_idx = 0; diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c index 9699ce076b7756..96a07839864b6c 100644 --- a/drivers/md/bcache/writeback.c +++ b/drivers/md/bcache/writeback.c @@ -947,6 +947,7 @@ void bch_sectors_dirty_init(struct bcache_device *d) return; } + memset(&state, 0, sizeof(struct bch_dirty_init_state)); state.c = c; state.d = d; state.total_threads = bch_btre_dirty_init_thread_nr(); From 71a218ca4fde7255fde96f5dc84e420e496dee17 Mon Sep 17 00:00:00 2001 From: Rustam Kovhaev Date: Tue, 28 Jun 2022 11:39:45 -0700 Subject: [PATCH 0140/1056] xfs: use kmem_cache_free() for kmem_cache objects [ Upstream commit c30a0cbd07ecc0eec7b3cd568f7b1c7bb7913f93 ] For kmalloc() allocations SLOB prepends the blocks with a 4-byte header, and it puts the size of the allocated blocks in that header. Blocks allocated with kmem_cache_alloc() allocations do not have that header. SLOB explodes when you allocate memory with kmem_cache_alloc() and then try to free it with kfree() instead of kmem_cache_free(). SLOB will assume that there is a header when there is none, read some garbage to size variable and corrupt the adjacent objects, which eventually leads to hang or panic. Let's make XFS work with SLOB by using proper free function. Fixes: 9749fee83f38 ("xfs: enable the xfs_defer mechanism to process extents to free") Signed-off-by: Rustam Kovhaev Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong Signed-off-by: Leah Rumancik Acked-by: Darrick J. Wong Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_extfree_item.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c index 3f8a0713573ad5..a4b8caa2c601d9 100644 --- a/fs/xfs/xfs_extfree_item.c +++ b/fs/xfs/xfs_extfree_item.c @@ -482,7 +482,7 @@ xfs_extent_free_finish_item( free->xefi_startblock, free->xefi_blockcount, &free->xefi_oinfo, free->xefi_skip_discard); - kmem_free(free); + kmem_cache_free(xfs_bmap_free_item_zone, free); return error; } @@ -502,7 +502,7 @@ xfs_extent_free_cancel_item( struct xfs_extent_free_item *free; free = container_of(item, struct xfs_extent_free_item, xefi_list); - kmem_free(free); + kmem_cache_free(xfs_bmap_free_item_zone, free); } const struct xfs_defer_op_type xfs_extent_free_defer_type = { @@ -564,7 +564,7 @@ xfs_agfl_free_finish_item( extp->ext_len = free->xefi_blockcount; efdp->efd_next_extent++; - kmem_free(free); + kmem_cache_free(xfs_bmap_free_item_zone, free); return error; } From 0e84e17c16a3c720f2492cb8c4d85a32d1c5511e Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Tue, 28 Jun 2022 11:39:46 -0700 Subject: [PATCH 0141/1056] xfs: punch out data fork delalloc blocks on COW writeback failure [ Upstream commit 5ca5916b6bc93577c360c06cb7cdf71adb9b5faf ] If writeback I/O to a COW extent fails, the COW fork blocks are punched out and the data fork blocks left alone. It is possible for COW fork blocks to overlap non-shared data fork blocks (due to cowextsz hint prealloc), however, and writeback unconditionally maps to the COW fork whenever blocks exist at the corresponding offset of the page undergoing writeback. This means it's quite possible for a COW fork extent to overlap delalloc data fork blocks, writeback to convert and map to the COW fork blocks, writeback to fail, and finally for ioend completion to cancel the COW fork blocks and leave stale data fork delalloc blocks around in the inode. The blocks are effectively stale because writeback failure also discards dirty page state. If this occurs, it is likely to trigger assert failures, free space accounting corruption and failures in unrelated file operations. For example, a subsequent reflink attempt of the affected file to a new target file will trip over the stale delalloc in the source file and fail. Several of these issues are occasionally reproduced by generic/648, but are reproducible on demand with the right sequence of operations and timely I/O error injection. To fix this problem, update the ioend failure path to also punch out underlying data fork delalloc blocks on I/O error. This is analogous to the writeback submission failure path in xfs_discard_page() where we might fail to map data fork delalloc blocks and consistent with the successful COW writeback completion path, which is responsible for unmapping from the data fork and remapping in COW fork blocks. Fixes: 787eb485509f ("xfs: fix and streamline error handling in xfs_end_io") Signed-off-by: Brian Foster Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong Signed-off-by: Leah Rumancik Acked-by: Darrick J. Wong Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_aops.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 34fc6148032a3f..c8c15c3c314714 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -82,6 +82,7 @@ xfs_end_ioend( struct iomap_ioend *ioend) { struct xfs_inode *ip = XFS_I(ioend->io_inode); + struct xfs_mount *mp = ip->i_mount; xfs_off_t offset = ioend->io_offset; size_t size = ioend->io_size; unsigned int nofs_flag; @@ -97,18 +98,26 @@ xfs_end_ioend( /* * Just clean up the in-memory structures if the fs has been shut down. */ - if (xfs_is_shutdown(ip->i_mount)) { + if (xfs_is_shutdown(mp)) { error = -EIO; goto done; } /* - * Clean up any COW blocks on an I/O error. + * Clean up all COW blocks and underlying data fork delalloc blocks on + * I/O error. The delalloc punch is required because this ioend was + * mapped to blocks in the COW fork and the associated pages are no + * longer dirty. If we don't remove delalloc blocks here, they become + * stale and can corrupt free space accounting on unmount. */ error = blk_status_to_errno(ioend->io_bio->bi_status); if (unlikely(error)) { - if (ioend->io_flags & IOMAP_F_SHARED) + if (ioend->io_flags & IOMAP_F_SHARED) { xfs_reflink_cancel_cow_range(ip, offset, size, true); + xfs_bmap_punch_delalloc_range(ip, + XFS_B_TO_FSBT(mp, offset), + XFS_B_TO_FSB(mp, size)); + } goto done; } From 40de647b2bab58745a396b4f1bb29287ae5ec7f5 Mon Sep 17 00:00:00 2001 From: Yang Xu Date: Tue, 28 Jun 2022 11:39:47 -0700 Subject: [PATCH 0142/1056] xfs: Fix the free logic of state in xfs_attr_node_hasname [ Upstream commit a1de97fe296c52eafc6590a3506f4bbd44ecb19a ] When testing xfstests xfs/126 on lastest upstream kernel, it will hang on some machine. Adding a getxattr operation after xattr corrupted, I can reproduce it 100%. The deadlock as below: [983.923403] task:setfattr state:D stack: 0 pid:17639 ppid: 14687 flags:0x00000080 [ 983.923405] Call Trace: [ 983.923410] __schedule+0x2c4/0x700 [ 983.923412] schedule+0x37/0xa0 [ 983.923414] schedule_timeout+0x274/0x300 [ 983.923416] __down+0x9b/0xf0 [ 983.923451] ? xfs_buf_find.isra.29+0x3c8/0x5f0 [xfs] [ 983.923453] down+0x3b/0x50 [ 983.923471] xfs_buf_lock+0x33/0xf0 [xfs] [ 983.923490] xfs_buf_find.isra.29+0x3c8/0x5f0 [xfs] [ 983.923508] xfs_buf_get_map+0x4c/0x320 [xfs] [ 983.923525] xfs_buf_read_map+0x53/0x310 [xfs] [ 983.923541] ? xfs_da_read_buf+0xcf/0x120 [xfs] [ 983.923560] xfs_trans_read_buf_map+0x1cf/0x360 [xfs] [ 983.923575] ? xfs_da_read_buf+0xcf/0x120 [xfs] [ 983.923590] xfs_da_read_buf+0xcf/0x120 [xfs] [ 983.923606] xfs_da3_node_read+0x1f/0x40 [xfs] [ 983.923621] xfs_da3_node_lookup_int+0x69/0x4a0 [xfs] [ 983.923624] ? kmem_cache_alloc+0x12e/0x270 [ 983.923637] xfs_attr_node_hasname+0x6e/0xa0 [xfs] [ 983.923651] xfs_has_attr+0x6e/0xd0 [xfs] [ 983.923664] xfs_attr_set+0x273/0x320 [xfs] [ 983.923683] xfs_xattr_set+0x87/0xd0 [xfs] [ 983.923686] __vfs_removexattr+0x4d/0x60 [ 983.923688] __vfs_removexattr_locked+0xac/0x130 [ 983.923689] vfs_removexattr+0x4e/0xf0 [ 983.923690] removexattr+0x4d/0x80 [ 983.923693] ? __check_object_size+0xa8/0x16b [ 983.923695] ? strncpy_from_user+0x47/0x1a0 [ 983.923696] ? getname_flags+0x6a/0x1e0 [ 983.923697] ? _cond_resched+0x15/0x30 [ 983.923699] ? __sb_start_write+0x1e/0x70 [ 983.923700] ? mnt_want_write+0x28/0x50 [ 983.923701] path_removexattr+0x9b/0xb0 [ 983.923702] __x64_sys_removexattr+0x17/0x20 [ 983.923704] do_syscall_64+0x5b/0x1a0 [ 983.923705] entry_SYSCALL_64_after_hwframe+0x65/0xca [ 983.923707] RIP: 0033:0x7f080f10ee1b When getxattr calls xfs_attr_node_get function, xfs_da3_node_lookup_int fails with EFSCORRUPTED in xfs_attr_node_hasname because we have use blocktrash to random it in xfs/126. So it free state in internal and xfs_attr_node_get doesn't do xfs_buf_trans release job. Then subsequent removexattr will hang because of it. This bug was introduced by kernel commit 07120f1abdff ("xfs: Add xfs_has_attr and subroutines"). It adds xfs_attr_node_hasname helper and said caller will be responsible for freeing the state in this case. But xfs_attr_node_hasname will free state itself instead of caller if xfs_da3_node_lookup_int fails. Fix this bug by moving the step of free state into caller. Also, use "goto error/out" instead of returning error directly in xfs_attr_node_addname_find_attr and xfs_attr_node_removename_setup function because we should free state ourselves. Fixes: 07120f1abdff ("xfs: Add xfs_has_attr and subroutines") Signed-off-by: Yang Xu Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong Signed-off-by: Leah Rumancik Acked-by: Darrick J. Wong Signed-off-by: Greg Kroah-Hartman --- fs/xfs/libxfs/xfs_attr.c | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c index fbc9d816882ce6..23523b802539e1 100644 --- a/fs/xfs/libxfs/xfs_attr.c +++ b/fs/xfs/libxfs/xfs_attr.c @@ -1077,21 +1077,18 @@ xfs_attr_node_hasname( state = xfs_da_state_alloc(args); if (statep != NULL) - *statep = NULL; + *statep = state; /* * Search to see if name exists, and get back a pointer to it. */ error = xfs_da3_node_lookup_int(state, &retval); - if (error) { - xfs_da_state_free(state); - return error; - } + if (error) + retval = error; - if (statep != NULL) - *statep = state; - else + if (!statep) xfs_da_state_free(state); + return retval; } @@ -1112,7 +1109,7 @@ xfs_attr_node_addname_find_attr( */ retval = xfs_attr_node_hasname(args, &dac->da_state); if (retval != -ENOATTR && retval != -EEXIST) - return retval; + goto error; if (retval == -ENOATTR && (args->attr_flags & XATTR_REPLACE)) goto error; @@ -1337,7 +1334,7 @@ int xfs_attr_node_removename_setup( error = xfs_attr_node_hasname(args, state); if (error != -EEXIST) - return error; + goto out; error = 0; ASSERT((*state)->path.blk[(*state)->path.active - 1].bp != NULL); From c4f376ba8be836d7aea18c954514470cacfeab06 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 28 Jun 2022 11:39:48 -0700 Subject: [PATCH 0143/1056] xfs: remove all COW fork extents when remounting readonly [ Upstream commit 089558bc7ba785c03815a49c89e28ad9b8de51f9 ] As part of multiple customer escalations due to file data corruption after copy on write operations, I wrote some fstests that use fsstress to hammer on COW to shake things loose. Regrettably, I caught some filesystem shutdowns due to incorrect rmap operations with the following loop: mount # (0) fsstress & # (1) while true; do fsstress mount -o remount,ro # (2) fsstress mount -o remount,rw # (3) done When (2) happens, notice that (1) is still running. xfs_remount_ro will call xfs_blockgc_stop to walk the inode cache to free all the COW extents, but the blockgc mechanism races with (1)'s reader threads to take IOLOCKs and loses, which means that it doesn't clean them all out. Call such a file (A). When (3) happens, xfs_remount_rw calls xfs_reflink_recover_cow, which walks the ondisk refcount btree and frees any COW extent that it finds. This function does not check the inode cache, which means that incore COW forks of inode (A) is now inconsistent with the ondisk metadata. If one of those former COW extents are allocated and mapped into another file (B) and someone triggers a COW to the stale reservation in (A), A's dirty data will be written into (B) and once that's done, those blocks will be transferred to (A)'s data fork without bumping the refcount. The results are catastrophic -- file (B) and the refcount btree are now corrupt. Solve this race by forcing the xfs_blockgc_free_space to run synchronously, which causes xfs_icwalk to return to inodes that were skipped because the blockgc code couldn't take the IOLOCK. This is safe to do here because the VFS has already prohibited new writer threads. Fixes: 10ddf64e420f ("xfs: remove leftover CoW reservations when remounting ro") Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner Reviewed-by: Chandan Babu R Signed-off-by: Leah Rumancik Acked-by: Darrick J. Wong Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_super.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 170fee98c45c78..23673703618aba 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -1768,7 +1768,10 @@ static int xfs_remount_ro( struct xfs_mount *mp) { - int error; + struct xfs_icwalk icw = { + .icw_flags = XFS_ICWALK_FLAG_SYNC, + }; + int error; /* * Cancel background eofb scanning so it cannot race with the final @@ -1776,8 +1779,13 @@ xfs_remount_ro( */ xfs_blockgc_stop(mp); - /* Get rid of any leftover CoW reservations... */ - error = xfs_blockgc_free_space(mp, NULL); + /* + * Clear out all remaining COW staging extents and speculative post-EOF + * preallocations so that we don't leave inodes requiring inactivation + * cleanups during reclaim on a read-only mount. We must process every + * cached inode, so this requires a synchronous cache scan. + */ + error = xfs_blockgc_free_space(mp, &icw); if (error) { xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); return error; From 4f0c91ab4c7d7608707bab4ffecd6034a5d926a1 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Tue, 28 Jun 2022 11:39:49 -0700 Subject: [PATCH 0144/1056] xfs: check sb_meta_uuid for dabuf buffer recovery [ Upstream commit 09654ed8a18cfd45027a67d6cbca45c9ea54feab ] Got a report that a repeated crash test of a container host would eventually fail with a log recovery error preventing the system from mounting the root filesystem. It manifested as a directory leaf node corruption on writeback like so: XFS (loop0): Mounting V5 Filesystem XFS (loop0): Starting recovery (logdev: internal) XFS (loop0): Metadata corruption detected at xfs_dir3_leaf_check_int+0x99/0xf0, xfs_dir3_leaf1 block 0x12faa158 XFS (loop0): Unmount and run xfs_repair XFS (loop0): First 128 bytes of corrupted metadata buffer: 00000000: 00 00 00 00 00 00 00 00 3d f1 00 00 e1 9e d5 8b ........=....... 00000010: 00 00 00 00 12 fa a1 58 00 00 00 29 00 00 1b cc .......X...).... 00000020: 91 06 78 ff f7 7e 4a 7d 8d 53 86 f2 ac 47 a8 23 ..x..~J}.S...G.# 00000030: 00 00 00 00 17 e0 00 80 00 43 00 00 00 00 00 00 .........C...... 00000040: 00 00 00 2e 00 00 00 08 00 00 17 2e 00 00 00 0a ................ 00000050: 02 35 79 83 00 00 00 30 04 d3 b4 80 00 00 01 50 .5y....0.......P 00000060: 08 40 95 7f 00 00 02 98 08 41 fe b7 00 00 02 d4 .@.......A...... 00000070: 0d 62 ef a7 00 00 01 f2 14 50 21 41 00 00 00 0c .b.......P!A.... XFS (loop0): Corruption of in-memory data (0x8) detected at xfs_do_force_shutdown+0x1a/0x20 (fs/xfs/xfs_buf.c:1514). Shutting down. XFS (loop0): Please unmount the filesystem and rectify the problem(s) XFS (loop0): log mount/recovery failed: error -117 XFS (loop0): log mount failed Tracing indicated that we were recovering changes from a transaction at LSN 0x29/0x1c16 into a buffer that had an LSN of 0x29/0x1d57. That is, log recovery was overwriting a buffer with newer changes on disk than was in the transaction. Tracing indicated that we were hitting the "recovery immediately" case in xfs_buf_log_recovery_lsn(), and hence it was ignoring the LSN in the buffer. The code was extracting the LSN correctly, then ignoring it because the UUID in the buffer did not match the superblock UUID. The problem arises because the UUID check uses the wrong UUID - it should be checking the sb_meta_uuid, not sb_uuid. This filesystem has sb_uuid != sb_meta_uuid (which is fine), and the buffer has the correct matching sb_meta_uuid in it, it's just the code checked it against the wrong superblock uuid. The is no corruption in the filesystem, and failing to recover the buffer due to a write verifier failure means the recovery bug did not propagate the corruption to disk. Hence there is no corruption before or after this bug has manifested, the impact is limited simply to an unmountable filesystem.... This was missed back in 2015 during an audit of incorrect sb_uuid usage that resulted in commit fcfbe2c4ef42 ("xfs: log recovery needs to validate against sb_meta_uuid") that fixed the magic32 buffers to validate against sb_meta_uuid instead of sb_uuid. It missed the magicda buffers.... Fixes: ce748eaa65f2 ("xfs: create new metadata UUID field and incompat flag") Signed-off-by: Dave Chinner Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong Signed-off-by: Leah Rumancik Acked-by: Darrick J. Wong Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_buf_item_recover.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/xfs/xfs_buf_item_recover.c b/fs/xfs/xfs_buf_item_recover.c index a476c7ef5d5337..991fbf1eb56402 100644 --- a/fs/xfs/xfs_buf_item_recover.c +++ b/fs/xfs/xfs_buf_item_recover.c @@ -816,7 +816,7 @@ xlog_recover_get_buf_lsn( } if (lsn != (xfs_lsn_t)-1) { - if (!uuid_equal(&mp->m_sb.sb_uuid, uuid)) + if (!uuid_equal(&mp->m_sb.sb_meta_uuid, uuid)) goto recover_immediately; return lsn; } From 3465b167831e9688efa9b5dba44070661b142e22 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 28 Jun 2022 11:39:50 -0700 Subject: [PATCH 0145/1056] xfs: prevent UAF in xfs_log_item_in_current_chkpt [ Upstream commit f8d92a66e810acbef6ddbc0bd0cbd9b117ce8acd ] While I was running with KASAN and lockdep enabled, I stumbled upon an KASAN report about a UAF to a freed CIL checkpoint. Looking at the comment for xfs_log_item_in_current_chkpt, it seems pretty obvious to me that the original patch to xfs_defer_finish_noroll should have done something to lock the CIL to prevent it from switching the CIL contexts while the predicate runs. For upper level code that needs to know if a given log item is new enough not to need relogging, add a new wrapper that takes the CIL context lock long enough to sample the current CIL context. This is kind of racy in that the CIL can switch the contexts immediately after sampling, but that's ok because the consequence is that the defer ops code is a little slow to relog items. ================================================================== BUG: KASAN: use-after-free in xfs_log_item_in_current_chkpt+0x139/0x160 [xfs] Read of size 8 at addr ffff88804ea5f608 by task fsstress/527999 CPU: 1 PID: 527999 Comm: fsstress Tainted: G D 5.16.0-rc4-xfsx #rc4 Call Trace: dump_stack_lvl+0x45/0x59 print_address_description.constprop.0+0x1f/0x140 kasan_report.cold+0x83/0xdf xfs_log_item_in_current_chkpt+0x139/0x160 xfs_defer_finish_noroll+0x3bb/0x1e30 __xfs_trans_commit+0x6c8/0xcf0 xfs_reflink_remap_extent+0x66f/0x10e0 xfs_reflink_remap_blocks+0x2dd/0xa90 xfs_file_remap_range+0x27b/0xc30 vfs_dedupe_file_range_one+0x368/0x420 vfs_dedupe_file_range+0x37c/0x5d0 do_vfs_ioctl+0x308/0x1260 __x64_sys_ioctl+0xa1/0x170 do_syscall_64+0x35/0x80 entry_SYSCALL_64_after_hwframe+0x44/0xae RIP: 0033:0x7f2c71a2950b Code: 0f 1e fa 48 8b 05 85 39 0d 00 64 c7 00 26 00 00 00 48 c7 c0 ff ff ff ff c3 66 0f 1f 44 00 00 f3 0f 1e fa b8 10 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 55 39 0d 00 f7 d8 64 89 01 48 RSP: 002b:00007ffe8c0e03c8 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 RAX: ffffffffffffffda RBX: 00005600862a8740 RCX: 00007f2c71a2950b RDX: 00005600862a7be0 RSI: 00000000c0189436 RDI: 0000000000000004 RBP: 000000000000000b R08: 0000000000000027 R09: 0000000000000003 R10: 0000000000000000 R11: 0000000000000246 R12: 000000000000005a R13: 00005600862804a8 R14: 0000000000016000 R15: 00005600862a8a20 Allocated by task 464064: kasan_save_stack+0x1e/0x50 __kasan_kmalloc+0x81/0xa0 kmem_alloc+0xcd/0x2c0 [xfs] xlog_cil_ctx_alloc+0x17/0x1e0 [xfs] xlog_cil_push_work+0x141/0x13d0 [xfs] process_one_work+0x7f6/0x1380 worker_thread+0x59d/0x1040 kthread+0x3b0/0x490 ret_from_fork+0x1f/0x30 Freed by task 51: kasan_save_stack+0x1e/0x50 kasan_set_track+0x21/0x30 kasan_set_free_info+0x20/0x30 __kasan_slab_free+0xed/0x130 slab_free_freelist_hook+0x7f/0x160 kfree+0xde/0x340 xlog_cil_committed+0xbfd/0xfe0 [xfs] xlog_cil_process_committed+0x103/0x1c0 [xfs] xlog_state_do_callback+0x45d/0xbd0 [xfs] xlog_ioend_work+0x116/0x1c0 [xfs] process_one_work+0x7f6/0x1380 worker_thread+0x59d/0x1040 kthread+0x3b0/0x490 ret_from_fork+0x1f/0x30 Last potentially related work creation: kasan_save_stack+0x1e/0x50 __kasan_record_aux_stack+0xb7/0xc0 insert_work+0x48/0x2e0 __queue_work+0x4e7/0xda0 queue_work_on+0x69/0x80 xlog_cil_push_now.isra.0+0x16b/0x210 [xfs] xlog_cil_force_seq+0x1b7/0x850 [xfs] xfs_log_force_seq+0x1c7/0x670 [xfs] xfs_file_fsync+0x7c1/0xa60 [xfs] __x64_sys_fsync+0x52/0x80 do_syscall_64+0x35/0x80 entry_SYSCALL_64_after_hwframe+0x44/0xae The buggy address belongs to the object at ffff88804ea5f600 which belongs to the cache kmalloc-256 of size 256 The buggy address is located 8 bytes inside of 256-byte region [ffff88804ea5f600, ffff88804ea5f700) The buggy address belongs to the page: page:ffffea00013a9780 refcount:1 mapcount:0 mapping:0000000000000000 index:0xffff88804ea5ea00 pfn:0x4ea5e head:ffffea00013a9780 order:1 compound_mapcount:0 flags: 0x4fff80000010200(slab|head|node=1|zone=1|lastcpupid=0xfff) raw: 04fff80000010200 ffffea0001245908 ffffea00011bd388 ffff888004c42b40 raw: ffff88804ea5ea00 0000000000100009 00000001ffffffff 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff88804ea5f500: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc ffff88804ea5f580: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc >ffff88804ea5f600: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ^ ffff88804ea5f680: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff88804ea5f700: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc ================================================================== Fixes: 4e919af7827a ("xfs: periodically relog deferred intent items") Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner Signed-off-by: Leah Rumancik Acked-by: Darrick J. Wong Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_log_cil.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c index 6c93c8ada6f35f..b59cc9c0961ce5 100644 --- a/fs/xfs/xfs_log_cil.c +++ b/fs/xfs/xfs_log_cil.c @@ -1442,9 +1442,9 @@ xlog_cil_force_seq( */ bool xfs_log_item_in_current_chkpt( - struct xfs_log_item *lip) + struct xfs_log_item *lip) { - struct xfs_cil_ctx *ctx = lip->li_mountp->m_log->l_cilp->xc_ctx; + struct xfs_cil *cil = lip->li_mountp->m_log->l_cilp; if (list_empty(&lip->li_cil)) return false; @@ -1454,7 +1454,7 @@ xfs_log_item_in_current_chkpt( * first checkpoint it is written to. Hence if it is different to the * current sequence, we're in a new checkpoint. */ - return lip->li_seq == ctx->sequence; + return lip->li_seq == READ_ONCE(cil->xc_current_sequence); } /* From ce6bfe55237e933fa31dad1c85686023ae387249 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 28 Jun 2022 11:39:51 -0700 Subject: [PATCH 0146/1056] xfs: only bother with sync_filesystem during readonly remount [ Upstream commit b97cca3ba9098522e5a1c3388764ead42640c1a5 ] In commit 02b9984d6408, we pushed a sync_filesystem() call from the VFS into xfs_fs_remount. The only time that we ever need to push dirty file data or metadata to disk for a remount is if we're remounting the filesystem read only, so this really could be moved to xfs_remount_ro. Once we've moved the call site, actually check the return value from sync_filesystem. Fixes: 02b9984d6408 ("fs: push sync_filesystem() down to the file system's remount_fs()") Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner Signed-off-by: Leah Rumancik Acked-by: Darrick J. Wong Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_super.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 23673703618aba..e8d19916ba99d1 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -1773,6 +1773,11 @@ xfs_remount_ro( }; int error; + /* Flush all the dirty data to disk. */ + error = sync_filesystem(mp->m_super); + if (error) + return error; + /* * Cancel background eofb scanning so it cannot race with the final * log force+buftarg wait and deadlock the remount. @@ -1851,8 +1856,6 @@ xfs_fs_reconfigure( if (error) return error; - sync_filesystem(mp->m_super); - /* inode32 -> inode64 */ if (xfs_has_small_inums(mp) && !xfs_has_small_inums(new_mp)) { mp->m_features &= ~XFS_FEAT_SMALL_INUMS; From ab0b6dc5e16ba6a876d141082912e8ed05e2a491 Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Mon, 16 May 2022 12:44:22 +0530 Subject: [PATCH 0147/1056] powerpc/ftrace: Remove ftrace init tramp once kernel init is complete commit 84ade0a6655bee803d176525ef457175cbf4df22 upstream. Stop using the ftrace trampoline for init section once kernel init is complete. Fixes: 67361cf8071286 ("powerpc/ftrace: Handle large kernel configs") Cc: stable@vger.kernel.org # v4.20+ Signed-off-by: Naveen N. Rao Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220516071422.463738-1-naveen.n.rao@linux.vnet.ibm.com Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/include/asm/ftrace.h | 4 +++- arch/powerpc/kernel/trace/ftrace.c | 15 ++++++++++++--- arch/powerpc/mm/mem.c | 2 ++ 3 files changed, 17 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/include/asm/ftrace.h b/arch/powerpc/include/asm/ftrace.h index debe8c4f706260..02d32d6422cd8b 100644 --- a/arch/powerpc/include/asm/ftrace.h +++ b/arch/powerpc/include/asm/ftrace.h @@ -96,7 +96,7 @@ static inline bool arch_syscall_match_sym_name(const char *sym, const char *name #endif /* PPC64_ELF_ABI_v1 */ #endif /* CONFIG_FTRACE_SYSCALLS */ -#ifdef CONFIG_PPC64 +#if defined(CONFIG_PPC64) && defined(CONFIG_FUNCTION_TRACER) #include static inline void this_cpu_disable_ftrace(void) @@ -120,11 +120,13 @@ static inline u8 this_cpu_get_ftrace_enabled(void) return get_paca()->ftrace_enabled; } +void ftrace_free_init_tramp(void); #else /* CONFIG_PPC64 */ static inline void this_cpu_disable_ftrace(void) { } static inline void this_cpu_enable_ftrace(void) { } static inline void this_cpu_set_ftrace_enabled(u8 ftrace_enabled) { } static inline u8 this_cpu_get_ftrace_enabled(void) { return 1; } +static inline void ftrace_free_init_tramp(void) { } #endif /* CONFIG_PPC64 */ #endif /* !__ASSEMBLY__ */ diff --git a/arch/powerpc/kernel/trace/ftrace.c b/arch/powerpc/kernel/trace/ftrace.c index d89c5df4f20620..660040c2d7b543 100644 --- a/arch/powerpc/kernel/trace/ftrace.c +++ b/arch/powerpc/kernel/trace/ftrace.c @@ -336,9 +336,7 @@ static int setup_mcount_compiler_tramp(unsigned long tramp) /* Is this a known long jump tramp? */ for (i = 0; i < NUM_FTRACE_TRAMPS; i++) - if (!ftrace_tramps[i]) - break; - else if (ftrace_tramps[i] == tramp) + if (ftrace_tramps[i] == tramp) return 0; /* Is this a known plt tramp? */ @@ -881,6 +879,17 @@ void arch_ftrace_update_code(int command) extern unsigned int ftrace_tramp_text[], ftrace_tramp_init[]; +void ftrace_free_init_tramp(void) +{ + int i; + + for (i = 0; i < NUM_FTRACE_TRAMPS && ftrace_tramps[i]; i++) + if (ftrace_tramps[i] == (unsigned long)ftrace_tramp_init) { + ftrace_tramps[i] = 0; + return; + } +} + int __init ftrace_dyn_arch_init(void) { int i; diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 05b9c3f31456c6..543a044560e935 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -22,6 +22,7 @@ #include #include #include +#include #include @@ -314,6 +315,7 @@ void free_initmem(void) mark_initmem_nx(); init_mem_is_free = true; free_initmem_default(POISON_FREE_INITMEM); + ftrace_free_init_tramp(); } /* From b3679e8b59966cf106d1f24f4467f8d346a4480b Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Tue, 28 Jun 2022 14:16:09 +0200 Subject: [PATCH 0148/1056] fs: add is_idmapped_mnt() helper commit bb49e9e730c2906a958eee273a7819f401543d6c upstream. Multiple places open-code the same check to determine whether a given mount is idmapped. Introduce a simple helper function that can be used instead. This allows us to get rid of the fragile open-coding. We will later change the check that is used to determine whether a given mount is idmapped. Introducing a helper allows us to do this in a single place instead of doing it for multiple places. Link: https://lore.kernel.org/r/20211123114227.3124056-2-brauner@kernel.org (v1) Link: https://lore.kernel.org/r/20211130121032.3753852-2-brauner@kernel.org (v2) Link: https://lore.kernel.org/r/20211203111707.3901969-2-brauner@kernel.org Cc: Seth Forshee Cc: Christoph Hellwig Cc: Al Viro CC: linux-fsdevel@vger.kernel.org Reviewed-by: Amir Goldstein Reviewed-by: Seth Forshee Signed-off-by: Christian Brauner Signed-off-by: Christian Brauner (Microsoft) Signed-off-by: Greg Kroah-Hartman --- fs/cachefiles/bind.c | 2 +- fs/ecryptfs/main.c | 2 +- fs/namespace.c | 2 +- fs/nfsd/export.c | 2 +- fs/overlayfs/super.c | 2 +- fs/proc_namespace.c | 2 +- include/linux/fs.h | 14 ++++++++++++++ 7 files changed, 20 insertions(+), 6 deletions(-) diff --git a/fs/cachefiles/bind.c b/fs/cachefiles/bind.c index d463d89f5db8cc..146291be626375 100644 --- a/fs/cachefiles/bind.c +++ b/fs/cachefiles/bind.c @@ -117,7 +117,7 @@ static int cachefiles_daemon_add_cache(struct cachefiles_cache *cache) root = path.dentry; ret = -EINVAL; - if (mnt_user_ns(path.mnt) != &init_user_ns) { + if (is_idmapped_mnt(path.mnt)) { pr_warn("File cache on idmapped mounts not supported"); goto error_unsupported; } diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c index d66bbd2df191e5..2dd23a82e0de56 100644 --- a/fs/ecryptfs/main.c +++ b/fs/ecryptfs/main.c @@ -537,7 +537,7 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags goto out_free; } - if (mnt_user_ns(path.mnt) != &init_user_ns) { + if (is_idmapped_mnt(path.mnt)) { rc = -EINVAL; printk(KERN_ERR "Mounting on idmapped mounts currently disallowed\n"); goto out_free; diff --git a/fs/namespace.c b/fs/namespace.c index b696543adab848..5b5aa107b9f33c 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -3936,7 +3936,7 @@ static int can_idmap_mount(const struct mount_kattr *kattr, struct mount *mnt) * mapping. It makes things simpler and callers can just create * another bind-mount they can idmap if they want to. */ - if (mnt_user_ns(m) != &init_user_ns) + if (is_idmapped_mnt(m)) return -EPERM; /* The underlying filesystem doesn't support idmapped mounts yet. */ diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index 9421dae2273748..668c7527b17e81 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c @@ -427,7 +427,7 @@ static int check_export(struct path *path, int *flags, unsigned char *uuid) return -EINVAL; } - if (mnt_user_ns(path->mnt) != &init_user_ns) { + if (is_idmapped_mnt(path->mnt)) { dprintk("exp_export: export of idmapped mounts not yet supported.\n"); return -EINVAL; } diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index 265181c110ae29..7bb0a47cb6156d 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -873,7 +873,7 @@ static int ovl_mount_dir_noesc(const char *name, struct path *path) pr_err("filesystem on '%s' not supported\n", name); goto out_put; } - if (mnt_user_ns(path->mnt) != &init_user_ns) { + if (is_idmapped_mnt(path->mnt)) { pr_err("idmapped layers are currently not supported\n"); goto out_put; } diff --git a/fs/proc_namespace.c b/fs/proc_namespace.c index 392ef5162655ba..49650e54d2f882 100644 --- a/fs/proc_namespace.c +++ b/fs/proc_namespace.c @@ -80,7 +80,7 @@ static void show_mnt_opts(struct seq_file *m, struct vfsmount *mnt) seq_puts(m, fs_infop->str); } - if (mnt_user_ns(mnt) != &init_user_ns) + if (is_idmapped_mnt(mnt)) seq_puts(m, ",idmapped"); } diff --git a/include/linux/fs.h b/include/linux/fs.h index 56eba723477e31..5720473b94b1f6 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2726,6 +2726,20 @@ static inline struct user_namespace *file_mnt_user_ns(struct file *file) { return mnt_user_ns(file->f_path.mnt); } + +/** + * is_idmapped_mnt - check whether a mount is mapped + * @mnt: the mount to check + * + * If @mnt has an idmapping attached to it @mnt is mapped. + * + * Return: true if mount is mapped, false if not. + */ +static inline bool is_idmapped_mnt(const struct vfsmount *mnt) +{ + return mnt_user_ns(mnt) != &init_user_ns; +} + extern long vfs_truncate(const struct path *, loff_t); int do_truncate(struct user_namespace *, struct dentry *, loff_t start, unsigned int time_attrs, struct file *filp); From 7bc23abcb4142ea3cb81d1772b8c6501bd63f2ad Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Tue, 28 Jun 2022 14:16:10 +0200 Subject: [PATCH 0149/1056] fs: move mapping helpers commit a793d79ea3e041081cd7cbd8ee43d0b5e4914a2b upstream. The low-level mapping helpers were so far crammed into fs.h. They are out of place there. The fs.h header should just contain the higher-level mapping helpers that interact directly with vfs objects such as struct super_block or struct inode and not the bare mapping helpers. Similarly, only vfs and specific fs code shall interact with low-level mapping helpers. And so they won't be made accessible automatically through regular {g,u}id helpers. Link: https://lore.kernel.org/r/20211123114227.3124056-3-brauner@kernel.org (v1) Link: https://lore.kernel.org/r/20211130121032.3753852-3-brauner@kernel.org (v2) Link: https://lore.kernel.org/r/20211203111707.3901969-3-brauner@kernel.org Cc: Seth Forshee Cc: Christoph Hellwig Cc: Al Viro CC: linux-fsdevel@vger.kernel.org Reviewed-by: Amir Goldstein Reviewed-by: Seth Forshee Signed-off-by: Christian Brauner Signed-off-by: Christian Brauner (Microsoft) Signed-off-by: Greg Kroah-Hartman --- fs/ksmbd/smbacl.c | 1 + fs/ksmbd/smbacl.h | 1 + fs/open.c | 1 + fs/posix_acl.c | 1 + fs/xfs/xfs_linux.h | 1 + include/linux/fs.h | 91 +----------------------------- include/linux/mnt_idmapping.h | 101 ++++++++++++++++++++++++++++++++++ security/commoncap.c | 1 + 8 files changed, 108 insertions(+), 90 deletions(-) create mode 100644 include/linux/mnt_idmapping.h diff --git a/fs/ksmbd/smbacl.c b/fs/ksmbd/smbacl.c index 7e57ffdb4ce396..7a6675fe57f72d 100644 --- a/fs/ksmbd/smbacl.c +++ b/fs/ksmbd/smbacl.c @@ -9,6 +9,7 @@ #include #include #include +#include #include "smbacl.h" #include "smb_common.h" diff --git a/fs/ksmbd/smbacl.h b/fs/ksmbd/smbacl.h index 73e08cad412bd7..eba1ebb9e92ec4 100644 --- a/fs/ksmbd/smbacl.h +++ b/fs/ksmbd/smbacl.h @@ -11,6 +11,7 @@ #include #include #include +#include #include "mgmt/tree_connect.h" diff --git a/fs/open.c b/fs/open.c index e0df1536eb69f9..3d2a95ca6404de 100644 --- a/fs/open.c +++ b/fs/open.c @@ -32,6 +32,7 @@ #include #include #include +#include #include "internal.h" diff --git a/fs/posix_acl.c b/fs/posix_acl.c index f5c25f580dd92f..f7cacce3c3e224 100644 --- a/fs/posix_acl.c +++ b/fs/posix_acl.c @@ -23,6 +23,7 @@ #include #include #include +#include static struct posix_acl **acl_by_type(struct inode *inode, int type) { diff --git a/fs/xfs/xfs_linux.h b/fs/xfs/xfs_linux.h index c174262a074e3a..09a8fba84ff99b 100644 --- a/fs/xfs/xfs_linux.h +++ b/fs/xfs/xfs_linux.h @@ -61,6 +61,7 @@ typedef __u32 xfs_nlink_t; #include #include #include +#include #include #include diff --git a/include/linux/fs.h b/include/linux/fs.h index 5720473b94b1f6..b1d446d71c3ffa 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -41,6 +41,7 @@ #include #include #include +#include #include #include @@ -1626,34 +1627,6 @@ static inline void i_gid_write(struct inode *inode, gid_t gid) inode->i_gid = make_kgid(inode->i_sb->s_user_ns, gid); } -/** - * kuid_into_mnt - map a kuid down into a mnt_userns - * @mnt_userns: user namespace of the relevant mount - * @kuid: kuid to be mapped - * - * Return: @kuid mapped according to @mnt_userns. - * If @kuid has no mapping INVALID_UID is returned. - */ -static inline kuid_t kuid_into_mnt(struct user_namespace *mnt_userns, - kuid_t kuid) -{ - return make_kuid(mnt_userns, __kuid_val(kuid)); -} - -/** - * kgid_into_mnt - map a kgid down into a mnt_userns - * @mnt_userns: user namespace of the relevant mount - * @kgid: kgid to be mapped - * - * Return: @kgid mapped according to @mnt_userns. - * If @kgid has no mapping INVALID_GID is returned. - */ -static inline kgid_t kgid_into_mnt(struct user_namespace *mnt_userns, - kgid_t kgid) -{ - return make_kgid(mnt_userns, __kgid_val(kgid)); -} - /** * i_uid_into_mnt - map an inode's i_uid down into a mnt_userns * @mnt_userns: user namespace of the mount the inode was found from @@ -1682,68 +1655,6 @@ static inline kgid_t i_gid_into_mnt(struct user_namespace *mnt_userns, return kgid_into_mnt(mnt_userns, inode->i_gid); } -/** - * kuid_from_mnt - map a kuid up into a mnt_userns - * @mnt_userns: user namespace of the relevant mount - * @kuid: kuid to be mapped - * - * Return: @kuid mapped up according to @mnt_userns. - * If @kuid has no mapping INVALID_UID is returned. - */ -static inline kuid_t kuid_from_mnt(struct user_namespace *mnt_userns, - kuid_t kuid) -{ - return KUIDT_INIT(from_kuid(mnt_userns, kuid)); -} - -/** - * kgid_from_mnt - map a kgid up into a mnt_userns - * @mnt_userns: user namespace of the relevant mount - * @kgid: kgid to be mapped - * - * Return: @kgid mapped up according to @mnt_userns. - * If @kgid has no mapping INVALID_GID is returned. - */ -static inline kgid_t kgid_from_mnt(struct user_namespace *mnt_userns, - kgid_t kgid) -{ - return KGIDT_INIT(from_kgid(mnt_userns, kgid)); -} - -/** - * mapped_fsuid - return caller's fsuid mapped up into a mnt_userns - * @mnt_userns: user namespace of the relevant mount - * - * Use this helper to initialize a new vfs or filesystem object based on - * the caller's fsuid. A common example is initializing the i_uid field of - * a newly allocated inode triggered by a creation event such as mkdir or - * O_CREAT. Other examples include the allocation of quotas for a specific - * user. - * - * Return: the caller's current fsuid mapped up according to @mnt_userns. - */ -static inline kuid_t mapped_fsuid(struct user_namespace *mnt_userns) -{ - return kuid_from_mnt(mnt_userns, current_fsuid()); -} - -/** - * mapped_fsgid - return caller's fsgid mapped up into a mnt_userns - * @mnt_userns: user namespace of the relevant mount - * - * Use this helper to initialize a new vfs or filesystem object based on - * the caller's fsgid. A common example is initializing the i_gid field of - * a newly allocated inode triggered by a creation event such as mkdir or - * O_CREAT. Other examples include the allocation of quotas for a specific - * user. - * - * Return: the caller's current fsgid mapped up according to @mnt_userns. - */ -static inline kgid_t mapped_fsgid(struct user_namespace *mnt_userns) -{ - return kgid_from_mnt(mnt_userns, current_fsgid()); -} - /** * inode_fsuid_set - initialize inode's i_uid field with callers fsuid * @inode: inode to initialize diff --git a/include/linux/mnt_idmapping.h b/include/linux/mnt_idmapping.h new file mode 100644 index 00000000000000..47c7811fadfec7 --- /dev/null +++ b/include/linux/mnt_idmapping.h @@ -0,0 +1,101 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_MNT_IDMAPPING_H +#define _LINUX_MNT_IDMAPPING_H + +#include +#include + +struct user_namespace; +extern struct user_namespace init_user_ns; + +/** + * kuid_into_mnt - map a kuid down into a mnt_userns + * @mnt_userns: user namespace of the relevant mount + * @kuid: kuid to be mapped + * + * Return: @kuid mapped according to @mnt_userns. + * If @kuid has no mapping INVALID_UID is returned. + */ +static inline kuid_t kuid_into_mnt(struct user_namespace *mnt_userns, + kuid_t kuid) +{ + return make_kuid(mnt_userns, __kuid_val(kuid)); +} + +/** + * kgid_into_mnt - map a kgid down into a mnt_userns + * @mnt_userns: user namespace of the relevant mount + * @kgid: kgid to be mapped + * + * Return: @kgid mapped according to @mnt_userns. + * If @kgid has no mapping INVALID_GID is returned. + */ +static inline kgid_t kgid_into_mnt(struct user_namespace *mnt_userns, + kgid_t kgid) +{ + return make_kgid(mnt_userns, __kgid_val(kgid)); +} + +/** + * kuid_from_mnt - map a kuid up into a mnt_userns + * @mnt_userns: user namespace of the relevant mount + * @kuid: kuid to be mapped + * + * Return: @kuid mapped up according to @mnt_userns. + * If @kuid has no mapping INVALID_UID is returned. + */ +static inline kuid_t kuid_from_mnt(struct user_namespace *mnt_userns, + kuid_t kuid) +{ + return KUIDT_INIT(from_kuid(mnt_userns, kuid)); +} + +/** + * kgid_from_mnt - map a kgid up into a mnt_userns + * @mnt_userns: user namespace of the relevant mount + * @kgid: kgid to be mapped + * + * Return: @kgid mapped up according to @mnt_userns. + * If @kgid has no mapping INVALID_GID is returned. + */ +static inline kgid_t kgid_from_mnt(struct user_namespace *mnt_userns, + kgid_t kgid) +{ + return KGIDT_INIT(from_kgid(mnt_userns, kgid)); +} + +/** + * mapped_fsuid - return caller's fsuid mapped up into a mnt_userns + * @mnt_userns: user namespace of the relevant mount + * + * Use this helper to initialize a new vfs or filesystem object based on + * the caller's fsuid. A common example is initializing the i_uid field of + * a newly allocated inode triggered by a creation event such as mkdir or + * O_CREAT. Other examples include the allocation of quotas for a specific + * user. + * + * Return: the caller's current fsuid mapped up according to @mnt_userns. + */ +static inline kuid_t mapped_fsuid(struct user_namespace *mnt_userns) +{ + return kuid_from_mnt(mnt_userns, current_fsuid()); +} + +/** + * mapped_fsgid - return caller's fsgid mapped up into a mnt_userns + * @mnt_userns: user namespace of the relevant mount + * + * Use this helper to initialize a new vfs or filesystem object based on + * the caller's fsgid. A common example is initializing the i_gid field of + * a newly allocated inode triggered by a creation event such as mkdir or + * O_CREAT. Other examples include the allocation of quotas for a specific + * user. + * + * Return: the caller's current fsgid mapped up according to @mnt_userns. + */ +static inline kgid_t mapped_fsgid(struct user_namespace *mnt_userns) +{ + return kgid_from_mnt(mnt_userns, current_fsgid()); +} + +#endif /* _LINUX_MNT_IDMAPPING_H */ diff --git a/security/commoncap.c b/security/commoncap.c index 3f810d37b71bde..09479f71ee2eda 100644 --- a/security/commoncap.c +++ b/security/commoncap.c @@ -24,6 +24,7 @@ #include #include #include +#include /* * If a non-root user executes a setuid-root binary in From 3374eb1b0afc9d4b2c35599d240b062488d976b4 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Tue, 28 Jun 2022 14:16:11 +0200 Subject: [PATCH 0150/1056] fs: tweak fsuidgid_has_mapping() commit 476860b3eb4a50958243158861d5340066df5af2 upstream. If the caller's fs{g,u}id aren't mapped in the mount's idmapping we can return early and skip the check whether the mapped fs{g,u}id also have a mapping in the filesystem's idmapping. If the fs{g,u}id aren't mapped in the mount's idmapping they consequently can't be mapped in the filesystem's idmapping. So there's no point in checking that. Link: https://lore.kernel.org/r/20211123114227.3124056-4-brauner@kernel.org (v1) Link: https://lore.kernel.org/r/20211130121032.3753852-4-brauner@kernel.org (v2) Link: https://lore.kernel.org/r/20211203111707.3901969-4-brauner@kernel.org Cc: Seth Forshee Cc: Christoph Hellwig Cc: Al Viro CC: linux-fsdevel@vger.kernel.org Reviewed-by: Amir Goldstein Reviewed-by: Seth Forshee Signed-off-by: Christian Brauner Signed-off-by: Christian Brauner (Microsoft) Signed-off-by: Greg Kroah-Hartman --- include/linux/fs.h | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/include/linux/fs.h b/include/linux/fs.h index b1d446d71c3ffa..ffc06557618c68 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1697,10 +1697,18 @@ static inline void inode_fsgid_set(struct inode *inode, static inline bool fsuidgid_has_mapping(struct super_block *sb, struct user_namespace *mnt_userns) { - struct user_namespace *s_user_ns = sb->s_user_ns; + struct user_namespace *fs_userns = sb->s_user_ns; + kuid_t kuid; + kgid_t kgid; - return kuid_has_mapping(s_user_ns, mapped_fsuid(mnt_userns)) && - kgid_has_mapping(s_user_ns, mapped_fsgid(mnt_userns)); + kuid = mapped_fsuid(mnt_userns); + if (!uid_valid(kuid)) + return false; + kgid = mapped_fsgid(mnt_userns); + if (!gid_valid(kgid)) + return false; + return kuid_has_mapping(fs_userns, kuid) && + kgid_has_mapping(fs_userns, kgid); } extern struct timespec64 current_time(struct inode *inode); From b20dcf603b8d0bb24a45c8e6cdd345e3fb3aa3d4 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Tue, 28 Jun 2022 14:16:12 +0200 Subject: [PATCH 0151/1056] fs: account for filesystem mappings commit 1ac2a4104968e0a60b4b3572216a92aab5c1b025 upstream. Currently we only support idmapped mounts for filesystems mounted without an idmapping. This was a conscious decision mentioned in multiple places (cf. e.g. [1]). As explained at length in [3] it is perfectly fine to extend support for idmapped mounts to filesystem's mounted with an idmapping should the need arise. The need has been there for some time now. Various container projects in userspace need this to run unprivileged and nested unprivileged containers (cf. [2]). Before we can port any filesystem that is mountable with an idmapping to support idmapped mounts we need to first extend the mapping helpers to account for the filesystem's idmapping. This again, is explained at length in our documentation at [3] but I'll give an overview here again. Currently, the low-level mapping helpers implement the remapping algorithms described in [3] in a simplified manner. Because we could rely on the fact that all filesystems supporting idmapped mounts are mounted without an idmapping the translation step from or into the filesystem idmapping could be skipped. In order to support idmapped mounts of filesystem's mountable with an idmapping the translation step we were able to skip before cannot be skipped anymore. A filesystem mounted with an idmapping is very likely to not use an identity mapping and will instead use a non-identity mapping. So the translation step from or into the filesystem's idmapping in the remapping algorithm cannot be skipped for such filesystems. More details with examples can be found in [3]. This patch adds a few new and prepares some already existing low-level mapping helpers to perform the full translation algorithm explained in [3]. The low-level helpers can be written in a way that they only perform the additional translation step when the filesystem is indeed mounted with an idmapping. If the low-level helpers detect that they are not dealing with an idmapped mount they can simply return the relevant k{g,u}id unchanged; no remapping needs to be performed at all. The no_idmapping() helper detects whether the shortcut can be used. If the low-level helpers detected that they are dealing with an idmapped mount but the underlying filesystem is mounted without an idmapping we can rely on the previous shorcut and can continue to skip the translation step from or into the filesystem's idmapping. These checks guarantee that only the minimal amount of work is performed. As before, if idmapped mounts aren't used the low-level helpers are idempotent and no work is performed at all. This patch adds the helpers mapped_k{g,u}id_fs() and mapped_k{g,u}id_user(). Following patches will port all places to replace the old k{g,u}id_into_mnt() and k{g,u}id_from_mnt() with these two new helpers. After the conversion is done k{g,u}id_into_mnt() and k{g,u}id_from_mnt() will be removed. This also concludes the renaming of the mapping helpers we started in [4]. Now, all mapping helpers will started with the "mapped_" prefix making everything nice and consistent. The mapped_k{g,u}id_fs() helpers replace the k{g,u}id_into_mnt() helpers. They are to be used when k{g,u}ids are to be mapped from the vfs, e.g. from from struct inode's i_{g,u}id. Conversely, the mapped_k{g,u}id_user() helpers replace the k{g,u}id_from_mnt() helpers. They are to be used when k{g,u}ids are to be written to disk, e.g. when entering from a system call to change ownership of a file. This patch only introduces the helpers. It doesn't yet convert the relevant places to account for filesystem mounted with an idmapping. [1]: commit 2ca4dcc4909d ("fs/mount_setattr: tighten permission checks") [2]: https://github.com/containers/podman/issues/10374 [3]: Documentations/filesystems/idmappings.rst [4]: commit a65e58e791a1 ("fs: document and rename fsid helpers") Link: https://lore.kernel.org/r/20211123114227.3124056-5-brauner@kernel.org (v1) Link: https://lore.kernel.org/r/20211130121032.3753852-5-brauner@kernel.org (v2) Link: https://lore.kernel.org/r/20211203111707.3901969-5-brauner@kernel.org Cc: Seth Forshee Cc: Amir Goldstein Cc: Christoph Hellwig Cc: Al Viro CC: linux-fsdevel@vger.kernel.org Reviewed-by: Seth Forshee Signed-off-by: Christian Brauner Signed-off-by: Christian Brauner (Microsoft) Signed-off-by: Greg Kroah-Hartman --- include/linux/fs.h | 4 +- include/linux/mnt_idmapping.h | 193 +++++++++++++++++++++++++++++++++- 2 files changed, 191 insertions(+), 6 deletions(-) diff --git a/include/linux/fs.h b/include/linux/fs.h index ffc06557618c68..889f2e9be02854 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1638,7 +1638,7 @@ static inline void i_gid_write(struct inode *inode, gid_t gid) static inline kuid_t i_uid_into_mnt(struct user_namespace *mnt_userns, const struct inode *inode) { - return kuid_into_mnt(mnt_userns, inode->i_uid); + return mapped_kuid_fs(mnt_userns, &init_user_ns, inode->i_uid); } /** @@ -1652,7 +1652,7 @@ static inline kuid_t i_uid_into_mnt(struct user_namespace *mnt_userns, static inline kgid_t i_gid_into_mnt(struct user_namespace *mnt_userns, const struct inode *inode) { - return kgid_into_mnt(mnt_userns, inode->i_gid); + return mapped_kgid_fs(mnt_userns, &init_user_ns, inode->i_gid); } /** diff --git a/include/linux/mnt_idmapping.h b/include/linux/mnt_idmapping.h index 47c7811fadfec7..60341cd33ccc9c 100644 --- a/include/linux/mnt_idmapping.h +++ b/include/linux/mnt_idmapping.h @@ -6,6 +6,11 @@ #include struct user_namespace; +/* + * Carries the initial idmapping of 0:0:4294967295 which is an identity + * mapping. This means that {g,u}id 0 is mapped to {g,u}id 0, {g,u}id 1 is + * mapped to {g,u}id 1, [...], {g,u}id 1000 to {g,u}id 1000, [...]. + */ extern struct user_namespace init_user_ns; /** @@ -64,9 +69,189 @@ static inline kgid_t kgid_from_mnt(struct user_namespace *mnt_userns, return KGIDT_INIT(from_kgid(mnt_userns, kgid)); } +/** + * initial_idmapping - check whether this is the initial mapping + * @ns: idmapping to check + * + * Check whether this is the initial mapping, mapping 0 to 0, 1 to 1, + * [...], 1000 to 1000 [...]. + * + * Return: true if this is the initial mapping, false if not. + */ +static inline bool initial_idmapping(const struct user_namespace *ns) +{ + return ns == &init_user_ns; +} + +/** + * no_idmapping - check whether we can skip remapping a kuid/gid + * @mnt_userns: the mount's idmapping + * @fs_userns: the filesystem's idmapping + * + * This function can be used to check whether a remapping between two + * idmappings is required. + * An idmapped mount is a mount that has an idmapping attached to it that + * is different from the filsystem's idmapping and the initial idmapping. + * If the initial mapping is used or the idmapping of the mount and the + * filesystem are identical no remapping is required. + * + * Return: true if remapping can be skipped, false if not. + */ +static inline bool no_idmapping(const struct user_namespace *mnt_userns, + const struct user_namespace *fs_userns) +{ + return initial_idmapping(mnt_userns) || mnt_userns == fs_userns; +} + +/** + * mapped_kuid_fs - map a filesystem kuid into a mnt_userns + * @mnt_userns: the mount's idmapping + * @fs_userns: the filesystem's idmapping + * @kuid : kuid to be mapped + * + * Take a @kuid and remap it from @fs_userns into @mnt_userns. Use this + * function when preparing a @kuid to be reported to userspace. + * + * If no_idmapping() determines that this is not an idmapped mount we can + * simply return @kuid unchanged. + * If initial_idmapping() tells us that the filesystem is not mounted with an + * idmapping we know the value of @kuid won't change when calling + * from_kuid() so we can simply retrieve the value via __kuid_val() + * directly. + * + * Return: @kuid mapped according to @mnt_userns. + * If @kuid has no mapping in either @mnt_userns or @fs_userns INVALID_UID is + * returned. + */ +static inline kuid_t mapped_kuid_fs(struct user_namespace *mnt_userns, + struct user_namespace *fs_userns, + kuid_t kuid) +{ + uid_t uid; + + if (no_idmapping(mnt_userns, fs_userns)) + return kuid; + if (initial_idmapping(fs_userns)) + uid = __kuid_val(kuid); + else + uid = from_kuid(fs_userns, kuid); + if (uid == (uid_t)-1) + return INVALID_UID; + return make_kuid(mnt_userns, uid); +} + +/** + * mapped_kgid_fs - map a filesystem kgid into a mnt_userns + * @mnt_userns: the mount's idmapping + * @fs_userns: the filesystem's idmapping + * @kgid : kgid to be mapped + * + * Take a @kgid and remap it from @fs_userns into @mnt_userns. Use this + * function when preparing a @kgid to be reported to userspace. + * + * If no_idmapping() determines that this is not an idmapped mount we can + * simply return @kgid unchanged. + * If initial_idmapping() tells us that the filesystem is not mounted with an + * idmapping we know the value of @kgid won't change when calling + * from_kgid() so we can simply retrieve the value via __kgid_val() + * directly. + * + * Return: @kgid mapped according to @mnt_userns. + * If @kgid has no mapping in either @mnt_userns or @fs_userns INVALID_GID is + * returned. + */ +static inline kgid_t mapped_kgid_fs(struct user_namespace *mnt_userns, + struct user_namespace *fs_userns, + kgid_t kgid) +{ + gid_t gid; + + if (no_idmapping(mnt_userns, fs_userns)) + return kgid; + if (initial_idmapping(fs_userns)) + gid = __kgid_val(kgid); + else + gid = from_kgid(fs_userns, kgid); + if (gid == (gid_t)-1) + return INVALID_GID; + return make_kgid(mnt_userns, gid); +} + +/** + * mapped_kuid_user - map a user kuid into a mnt_userns + * @mnt_userns: the mount's idmapping + * @fs_userns: the filesystem's idmapping + * @kuid : kuid to be mapped + * + * Use the idmapping of @mnt_userns to remap a @kuid into @fs_userns. Use this + * function when preparing a @kuid to be written to disk or inode. + * + * If no_idmapping() determines that this is not an idmapped mount we can + * simply return @kuid unchanged. + * If initial_idmapping() tells us that the filesystem is not mounted with an + * idmapping we know the value of @kuid won't change when calling + * make_kuid() so we can simply retrieve the value via KUIDT_INIT() + * directly. + * + * Return: @kuid mapped according to @mnt_userns. + * If @kuid has no mapping in either @mnt_userns or @fs_userns INVALID_UID is + * returned. + */ +static inline kuid_t mapped_kuid_user(struct user_namespace *mnt_userns, + struct user_namespace *fs_userns, + kuid_t kuid) +{ + uid_t uid; + + if (no_idmapping(mnt_userns, fs_userns)) + return kuid; + uid = from_kuid(mnt_userns, kuid); + if (uid == (uid_t)-1) + return INVALID_UID; + if (initial_idmapping(fs_userns)) + return KUIDT_INIT(uid); + return make_kuid(fs_userns, uid); +} + +/** + * mapped_kgid_user - map a user kgid into a mnt_userns + * @mnt_userns: the mount's idmapping + * @fs_userns: the filesystem's idmapping + * @kgid : kgid to be mapped + * + * Use the idmapping of @mnt_userns to remap a @kgid into @fs_userns. Use this + * function when preparing a @kgid to be written to disk or inode. + * + * If no_idmapping() determines that this is not an idmapped mount we can + * simply return @kgid unchanged. + * If initial_idmapping() tells us that the filesystem is not mounted with an + * idmapping we know the value of @kgid won't change when calling + * make_kgid() so we can simply retrieve the value via KGIDT_INIT() + * directly. + * + * Return: @kgid mapped according to @mnt_userns. + * If @kgid has no mapping in either @mnt_userns or @fs_userns INVALID_GID is + * returned. + */ +static inline kgid_t mapped_kgid_user(struct user_namespace *mnt_userns, + struct user_namespace *fs_userns, + kgid_t kgid) +{ + gid_t gid; + + if (no_idmapping(mnt_userns, fs_userns)) + return kgid; + gid = from_kgid(mnt_userns, kgid); + if (gid == (gid_t)-1) + return INVALID_GID; + if (initial_idmapping(fs_userns)) + return KGIDT_INIT(gid); + return make_kgid(fs_userns, gid); +} + /** * mapped_fsuid - return caller's fsuid mapped up into a mnt_userns - * @mnt_userns: user namespace of the relevant mount + * @mnt_userns: the mount's idmapping * * Use this helper to initialize a new vfs or filesystem object based on * the caller's fsuid. A common example is initializing the i_uid field of @@ -78,12 +263,12 @@ static inline kgid_t kgid_from_mnt(struct user_namespace *mnt_userns, */ static inline kuid_t mapped_fsuid(struct user_namespace *mnt_userns) { - return kuid_from_mnt(mnt_userns, current_fsuid()); + return mapped_kuid_user(mnt_userns, &init_user_ns, current_fsuid()); } /** * mapped_fsgid - return caller's fsgid mapped up into a mnt_userns - * @mnt_userns: user namespace of the relevant mount + * @mnt_userns: the mount's idmapping * * Use this helper to initialize a new vfs or filesystem object based on * the caller's fsgid. A common example is initializing the i_gid field of @@ -95,7 +280,7 @@ static inline kuid_t mapped_fsuid(struct user_namespace *mnt_userns) */ static inline kgid_t mapped_fsgid(struct user_namespace *mnt_userns) { - return kgid_from_mnt(mnt_userns, current_fsgid()); + return mapped_kgid_user(mnt_userns, &init_user_ns, current_fsgid()); } #endif /* _LINUX_MNT_IDMAPPING_H */ From 1c62e0186d947b3b04a78a8f3750c6446c01c839 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Tue, 28 Jun 2022 14:16:13 +0200 Subject: [PATCH 0152/1056] docs: update mapping documentation commit 8cc5c54de44c5e8e104d364a627ac4296845fc7f upstream. Now that we implement the full remapping algorithms described in our documentation remove the section about shortcircuting them. Link: https://lore.kernel.org/r/20211123114227.3124056-6-brauner@kernel.org (v1) Link: https://lore.kernel.org/r/20211130121032.3753852-6-brauner@kernel.org (v2) Link: https://lore.kernel.org/r/20211203111707.3901969-6-brauner@kernel.org Cc: Seth Forshee Cc: Amir Goldstein Cc: Christoph Hellwig Cc: Al Viro CC: linux-fsdevel@vger.kernel.org Reviewed-by: Seth Forshee Signed-off-by: Christian Brauner Signed-off-by: Christian Brauner (Microsoft) Signed-off-by: Greg Kroah-Hartman --- Documentation/filesystems/idmappings.rst | 72 ------------------------ 1 file changed, 72 deletions(-) diff --git a/Documentation/filesystems/idmappings.rst b/Documentation/filesystems/idmappings.rst index 1229a75ec75dd5..7a879ec3b6bf08 100644 --- a/Documentation/filesystems/idmappings.rst +++ b/Documentation/filesystems/idmappings.rst @@ -952,75 +952,3 @@ The raw userspace id that is put on disk is ``u1000`` so when the user takes their home directory back to their home computer where they are assigned ``u1000`` using the initial idmapping and mount the filesystem with the initial idmapping they will see all those files owned by ``u1000``. - -Shortcircuting --------------- - -Currently, the implementation of idmapped mounts enforces that the filesystem -is mounted with the initial idmapping. The reason is simply that none of the -filesystems that we targeted were mountable with a non-initial idmapping. But -that might change soon enough. As we've seen above, thanks to the properties of -idmappings the translation works for both filesystems mounted with the initial -idmapping and filesystem with non-initial idmappings. - -Based on this current restriction to filesystem mounted with the initial -idmapping two noticeable shortcuts have been taken: - -1. We always stash a reference to the initial user namespace in ``struct - vfsmount``. Idmapped mounts are thus mounts that have a non-initial user - namespace attached to them. - - In order to support idmapped mounts this needs to be changed. Instead of - stashing the initial user namespace the user namespace the filesystem was - mounted with must be stashed. An idmapped mount is then any mount that has - a different user namespace attached then the filesystem was mounted with. - This has no user-visible consequences. - -2. The translation algorithms in ``mapped_fs*id()`` and ``i_*id_into_mnt()`` - are simplified. - - Let's consider ``mapped_fs*id()`` first. This function translates the - caller's kernel id into a kernel id in the filesystem's idmapping via - a mount's idmapping. The full algorithm is:: - - mapped_fsuid(kid): - /* Map the kernel id up into a userspace id in the mount's idmapping. */ - from_kuid(mount-idmapping, kid) = uid - - /* Map the userspace id down into a kernel id in the filesystem's idmapping. */ - make_kuid(filesystem-idmapping, uid) = kuid - - We know that the filesystem is always mounted with the initial idmapping as - we enforce this in ``mount_setattr()``. So this can be shortened to:: - - mapped_fsuid(kid): - /* Map the kernel id up into a userspace id in the mount's idmapping. */ - from_kuid(mount-idmapping, kid) = uid - - /* Map the userspace id down into a kernel id in the filesystem's idmapping. */ - KUIDT_INIT(uid) = kuid - - Similarly, for ``i_*id_into_mnt()`` which translated the filesystem's kernel - id into a mount's kernel id:: - - i_uid_into_mnt(kid): - /* Map the kernel id up into a userspace id in the filesystem's idmapping. */ - from_kuid(filesystem-idmapping, kid) = uid - - /* Map the userspace id down into a kernel id in the mounts's idmapping. */ - make_kuid(mount-idmapping, uid) = kuid - - Again, we know that the filesystem is always mounted with the initial - idmapping as we enforce this in ``mount_setattr()``. So this can be - shortened to:: - - i_uid_into_mnt(kid): - /* Map the kernel id up into a userspace id in the filesystem's idmapping. */ - __kuid_val(kid) = uid - - /* Map the userspace id down into a kernel id in the mounts's idmapping. */ - make_kuid(mount-idmapping, uid) = kuid - -Handling filesystems mounted with non-initial idmappings requires that the -translation functions be converted to their full form. They can still be -shortcircuited on non-idmapped mounts. This has no user-visible consequences. From f895d0ff47be88e950e526e757678ef6392c958f Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Tue, 28 Jun 2022 14:16:14 +0200 Subject: [PATCH 0153/1056] fs: use low-level mapping helpers commit 4472071331549e911a5abad41aea6e3be855a1a4 upstream. In a few places the vfs needs to interact with bare k{g,u}ids directly instead of struct inode. These are just a few. In previous patches we introduced low-level mapping helpers that are able to support filesystems mounted an idmapping. This patch simply converts the places to use these new helpers. Link: https://lore.kernel.org/r/20211123114227.3124056-7-brauner@kernel.org (v1) Link: https://lore.kernel.org/r/20211130121032.3753852-7-brauner@kernel.org (v2) Link: https://lore.kernel.org/r/20211203111707.3901969-7-brauner@kernel.org Cc: Seth Forshee Cc: Amir Goldstein Cc: Christoph Hellwig Cc: Al Viro CC: linux-fsdevel@vger.kernel.org Reviewed-by: Seth Forshee Signed-off-by: Christian Brauner Signed-off-by: Christian Brauner (Microsoft) Signed-off-by: Greg Kroah-Hartman --- fs/ksmbd/smbacl.c | 18 ++---------------- fs/ksmbd/smbacl.h | 4 ++-- fs/open.c | 4 ++-- fs/posix_acl.c | 16 ++++++++++------ security/commoncap.c | 13 ++++++++----- 5 files changed, 24 insertions(+), 31 deletions(-) diff --git a/fs/ksmbd/smbacl.c b/fs/ksmbd/smbacl.c index 7a6675fe57f72d..38f23bf981ac96 100644 --- a/fs/ksmbd/smbacl.c +++ b/fs/ksmbd/smbacl.c @@ -275,14 +275,7 @@ static int sid_to_id(struct user_namespace *user_ns, uid_t id; id = le32_to_cpu(psid->sub_auth[psid->num_subauth - 1]); - /* - * Translate raw sid into kuid in the server's user - * namespace. - */ - uid = make_kuid(&init_user_ns, id); - - /* If this is an idmapped mount, apply the idmapping. */ - uid = kuid_from_mnt(user_ns, uid); + uid = mapped_kuid_user(user_ns, &init_user_ns, KUIDT_INIT(id)); if (uid_valid(uid)) { fattr->cf_uid = uid; rc = 0; @@ -292,14 +285,7 @@ static int sid_to_id(struct user_namespace *user_ns, gid_t id; id = le32_to_cpu(psid->sub_auth[psid->num_subauth - 1]); - /* - * Translate raw sid into kgid in the server's user - * namespace. - */ - gid = make_kgid(&init_user_ns, id); - - /* If this is an idmapped mount, apply the idmapping. */ - gid = kgid_from_mnt(user_ns, gid); + gid = mapped_kgid_user(user_ns, &init_user_ns, KGIDT_INIT(id)); if (gid_valid(gid)) { fattr->cf_gid = gid; rc = 0; diff --git a/fs/ksmbd/smbacl.h b/fs/ksmbd/smbacl.h index eba1ebb9e92ec4..811af330942916 100644 --- a/fs/ksmbd/smbacl.h +++ b/fs/ksmbd/smbacl.h @@ -217,7 +217,7 @@ static inline uid_t posix_acl_uid_translate(struct user_namespace *mnt_userns, kuid_t kuid; /* If this is an idmapped mount, apply the idmapping. */ - kuid = kuid_into_mnt(mnt_userns, pace->e_uid); + kuid = mapped_kuid_fs(mnt_userns, &init_user_ns, pace->e_uid); /* Translate the kuid into a userspace id ksmbd would see. */ return from_kuid(&init_user_ns, kuid); @@ -229,7 +229,7 @@ static inline gid_t posix_acl_gid_translate(struct user_namespace *mnt_userns, kgid_t kgid; /* If this is an idmapped mount, apply the idmapping. */ - kgid = kgid_into_mnt(mnt_userns, pace->e_gid); + kgid = mapped_kgid_fs(mnt_userns, &init_user_ns, pace->e_gid); /* Translate the kgid into a userspace id ksmbd would see. */ return from_kgid(&init_user_ns, kgid); diff --git a/fs/open.c b/fs/open.c index 3d2a95ca6404de..31c47abbddf675 100644 --- a/fs/open.c +++ b/fs/open.c @@ -653,8 +653,8 @@ int chown_common(const struct path *path, uid_t user, gid_t group) gid = make_kgid(current_user_ns(), group); mnt_userns = mnt_user_ns(path->mnt); - uid = kuid_from_mnt(mnt_userns, uid); - gid = kgid_from_mnt(mnt_userns, gid); + uid = mapped_kuid_user(mnt_userns, &init_user_ns, uid); + gid = mapped_kgid_user(mnt_userns, &init_user_ns, gid); retry_deleg: newattrs.ia_valid = ATTR_CTIME; diff --git a/fs/posix_acl.c b/fs/posix_acl.c index f7cacce3c3e224..3d7ee193424a18 100644 --- a/fs/posix_acl.c +++ b/fs/posix_acl.c @@ -376,7 +376,9 @@ posix_acl_permission(struct user_namespace *mnt_userns, struct inode *inode, goto check_perm; break; case ACL_USER: - uid = kuid_into_mnt(mnt_userns, pa->e_uid); + uid = mapped_kuid_fs(mnt_userns, + &init_user_ns, + pa->e_uid); if (uid_eq(uid, current_fsuid())) goto mask; break; @@ -389,7 +391,9 @@ posix_acl_permission(struct user_namespace *mnt_userns, struct inode *inode, } break; case ACL_GROUP: - gid = kgid_into_mnt(mnt_userns, pa->e_gid); + gid = mapped_kgid_fs(mnt_userns, + &init_user_ns, + pa->e_gid); if (in_group_p(gid)) { found = 1; if ((pa->e_perm & want) == want) @@ -736,17 +740,17 @@ static void posix_acl_fix_xattr_userns( case ACL_USER: uid = make_kuid(from, le32_to_cpu(entry->e_id)); if (from_user) - uid = kuid_from_mnt(mnt_userns, uid); + uid = mapped_kuid_user(mnt_userns, &init_user_ns, uid); else - uid = kuid_into_mnt(mnt_userns, uid); + uid = mapped_kuid_fs(mnt_userns, &init_user_ns, uid); entry->e_id = cpu_to_le32(from_kuid(to, uid)); break; case ACL_GROUP: gid = make_kgid(from, le32_to_cpu(entry->e_id)); if (from_user) - gid = kgid_from_mnt(mnt_userns, gid); + gid = mapped_kgid_user(mnt_userns, &init_user_ns, gid); else - gid = kgid_into_mnt(mnt_userns, gid); + gid = mapped_kgid_fs(mnt_userns, &init_user_ns, gid); entry->e_id = cpu_to_le32(from_kgid(to, gid)); break; default: diff --git a/security/commoncap.c b/security/commoncap.c index 09479f71ee2eda..d288a62e299967 100644 --- a/security/commoncap.c +++ b/security/commoncap.c @@ -419,7 +419,7 @@ int cap_inode_getsecurity(struct user_namespace *mnt_userns, kroot = make_kuid(fs_ns, root); /* If this is an idmapped mount shift the kuid. */ - kroot = kuid_into_mnt(mnt_userns, kroot); + kroot = mapped_kuid_fs(mnt_userns, &init_user_ns, kroot); /* If the root kuid maps to a valid uid in current ns, then return * this as a nscap. */ @@ -489,6 +489,7 @@ int cap_inode_getsecurity(struct user_namespace *mnt_userns, * @size: size of @ivalue * @task_ns: user namespace of the caller * @mnt_userns: user namespace of the mount the inode was found from + * @fs_userns: user namespace of the filesystem * * If the inode has been found through an idmapped mount the user namespace of * the vfsmount must be passed through @mnt_userns. This function will then @@ -498,7 +499,8 @@ int cap_inode_getsecurity(struct user_namespace *mnt_userns, */ static kuid_t rootid_from_xattr(const void *value, size_t size, struct user_namespace *task_ns, - struct user_namespace *mnt_userns) + struct user_namespace *mnt_userns, + struct user_namespace *fs_userns) { const struct vfs_ns_cap_data *nscap = value; kuid_t rootkid; @@ -508,7 +510,7 @@ static kuid_t rootid_from_xattr(const void *value, size_t size, rootid = le32_to_cpu(nscap->rootid); rootkid = make_kuid(task_ns, rootid); - return kuid_from_mnt(mnt_userns, rootkid); + return mapped_kuid_user(mnt_userns, fs_userns, rootkid); } static bool validheader(size_t size, const struct vfs_cap_data *cap) @@ -559,7 +561,8 @@ int cap_convert_nscap(struct user_namespace *mnt_userns, struct dentry *dentry, /* user is privileged, just write the v2 */ return size; - rootid = rootid_from_xattr(*ivalue, size, task_ns, mnt_userns); + rootid = rootid_from_xattr(*ivalue, size, task_ns, mnt_userns, + &init_user_ns); if (!uid_valid(rootid)) return -EINVAL; @@ -700,7 +703,7 @@ int get_vfs_caps_from_disk(struct user_namespace *mnt_userns, /* Limit the caps to the mounter of the filesystem * or the more limited uid specified in the xattr. */ - rootkuid = kuid_into_mnt(mnt_userns, rootkuid); + rootkuid = mapped_kuid_fs(mnt_userns, &init_user_ns, rootkuid); if (!rootid_owns_currentns(rootkuid)) return -ENODATA; From 7d0536a8fab719544db98e58ad49b9a07332922d Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Tue, 28 Jun 2022 14:16:15 +0200 Subject: [PATCH 0154/1056] fs: remove unused low-level mapping helpers commit 02e4079913500f24ceb082d8d87d8665f044b298 upstream. Now that we ported all places to use the new low-level mapping helpers that are able to support filesystems mounted with an idmapping we can remove the old low-level mapping helpers. With the removal of these old helpers we also conclude the renaming of the mapping helpers we started in commit a65e58e791a1 ("fs: document and rename fsid helpers"). Link: https://lore.kernel.org/r/20211123114227.3124056-8-brauner@kernel.org (v1) Link: https://lore.kernel.org/r/20211130121032.3753852-8-brauner@kernel.org (v2) Link: https://lore.kernel.org/r/20211203111707.3901969-8-brauner@kernel.org Cc: Seth Forshee Cc: Christoph Hellwig Cc: Al Viro CC: linux-fsdevel@vger.kernel.org Reviewed-by: Amir Goldstein Reviewed-by: Seth Forshee Signed-off-by: Christian Brauner Signed-off-by: Christian Brauner (Microsoft) Signed-off-by: Greg Kroah-Hartman --- include/linux/mnt_idmapping.h | 56 ----------------------------------- 1 file changed, 56 deletions(-) diff --git a/include/linux/mnt_idmapping.h b/include/linux/mnt_idmapping.h index 60341cd33ccc9c..0c6ab3f4c95275 100644 --- a/include/linux/mnt_idmapping.h +++ b/include/linux/mnt_idmapping.h @@ -13,62 +13,6 @@ struct user_namespace; */ extern struct user_namespace init_user_ns; -/** - * kuid_into_mnt - map a kuid down into a mnt_userns - * @mnt_userns: user namespace of the relevant mount - * @kuid: kuid to be mapped - * - * Return: @kuid mapped according to @mnt_userns. - * If @kuid has no mapping INVALID_UID is returned. - */ -static inline kuid_t kuid_into_mnt(struct user_namespace *mnt_userns, - kuid_t kuid) -{ - return make_kuid(mnt_userns, __kuid_val(kuid)); -} - -/** - * kgid_into_mnt - map a kgid down into a mnt_userns - * @mnt_userns: user namespace of the relevant mount - * @kgid: kgid to be mapped - * - * Return: @kgid mapped according to @mnt_userns. - * If @kgid has no mapping INVALID_GID is returned. - */ -static inline kgid_t kgid_into_mnt(struct user_namespace *mnt_userns, - kgid_t kgid) -{ - return make_kgid(mnt_userns, __kgid_val(kgid)); -} - -/** - * kuid_from_mnt - map a kuid up into a mnt_userns - * @mnt_userns: user namespace of the relevant mount - * @kuid: kuid to be mapped - * - * Return: @kuid mapped up according to @mnt_userns. - * If @kuid has no mapping INVALID_UID is returned. - */ -static inline kuid_t kuid_from_mnt(struct user_namespace *mnt_userns, - kuid_t kuid) -{ - return KUIDT_INIT(from_kuid(mnt_userns, kuid)); -} - -/** - * kgid_from_mnt - map a kgid up into a mnt_userns - * @mnt_userns: user namespace of the relevant mount - * @kgid: kgid to be mapped - * - * Return: @kgid mapped up according to @mnt_userns. - * If @kgid has no mapping INVALID_GID is returned. - */ -static inline kgid_t kgid_from_mnt(struct user_namespace *mnt_userns, - kgid_t kgid) -{ - return KGIDT_INIT(from_kgid(mnt_userns, kgid)); -} - /** * initial_idmapping - check whether this is the initial mapping * @ns: idmapping to check From 21c6c720be75049913336099c33129089497d7cb Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Tue, 28 Jun 2022 14:16:16 +0200 Subject: [PATCH 0155/1056] fs: port higher-level mapping helpers commit 209188ce75d0d357c292f6bb81d712acdd4e7db7 upstream. Enable the mapped_fs{g,u}id() helpers to support filesystems mounted with an idmapping. Apart from core mapping helpers that use mapped_fs{g,u}id() to initialize struct inode's i_{g,u}id fields xfs is the only place that uses these low-level helpers directly. The patch only extends the helpers to be able to take the filesystem idmapping into account. Since we don't actually yet pass the filesystem's idmapping in no functional changes happen. This will happen in a final patch. Link: https://lore.kernel.org/r/20211123114227.3124056-9-brauner@kernel.org (v1) Link: https://lore.kernel.org/r/20211130121032.3753852-9-brauner@kernel.org (v2) Link: https://lore.kernel.org/r/20211203111707.3901969-9-brauner@kernel.org Cc: Seth Forshee Cc: Christoph Hellwig Cc: Al Viro CC: linux-fsdevel@vger.kernel.org Reviewed-by: Amir Goldstein Reviewed-by: Seth Forshee Signed-off-by: Christian Brauner Signed-off-by: Christian Brauner (Microsoft) Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_inode.c | 8 ++++---- fs/xfs/xfs_symlink.c | 4 ++-- include/linux/fs.h | 8 ++++---- include/linux/mnt_idmapping.h | 12 ++++++++---- 4 files changed, 18 insertions(+), 14 deletions(-) diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index a4f6f034fb8131..2477e301fa8284 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -994,8 +994,8 @@ xfs_create( /* * Make sure that we have allocated dquot(s) on disk. */ - error = xfs_qm_vop_dqalloc(dp, mapped_fsuid(mnt_userns), - mapped_fsgid(mnt_userns), prid, + error = xfs_qm_vop_dqalloc(dp, mapped_fsuid(mnt_userns, &init_user_ns), + mapped_fsgid(mnt_userns, &init_user_ns), prid, XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, &udqp, &gdqp, &pdqp); if (error) @@ -1148,8 +1148,8 @@ xfs_create_tmpfile( /* * Make sure that we have allocated dquot(s) on disk. */ - error = xfs_qm_vop_dqalloc(dp, mapped_fsuid(mnt_userns), - mapped_fsgid(mnt_userns), prid, + error = xfs_qm_vop_dqalloc(dp, mapped_fsuid(mnt_userns, &init_user_ns), + mapped_fsgid(mnt_userns, &init_user_ns), prid, XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, &udqp, &gdqp, &pdqp); if (error) diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c index fc2c6a40464712..a31d2e5d03214d 100644 --- a/fs/xfs/xfs_symlink.c +++ b/fs/xfs/xfs_symlink.c @@ -184,8 +184,8 @@ xfs_symlink( /* * Make sure that we have allocated dquot(s) on disk. */ - error = xfs_qm_vop_dqalloc(dp, mapped_fsuid(mnt_userns), - mapped_fsgid(mnt_userns), prid, + error = xfs_qm_vop_dqalloc(dp, mapped_fsuid(mnt_userns, &init_user_ns), + mapped_fsgid(mnt_userns, &init_user_ns), prid, XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, &udqp, &gdqp, &pdqp); if (error) diff --git a/include/linux/fs.h b/include/linux/fs.h index 889f2e9be02854..f62c1b9cd7cded 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1666,7 +1666,7 @@ static inline kgid_t i_gid_into_mnt(struct user_namespace *mnt_userns, static inline void inode_fsuid_set(struct inode *inode, struct user_namespace *mnt_userns) { - inode->i_uid = mapped_fsuid(mnt_userns); + inode->i_uid = mapped_fsuid(mnt_userns, &init_user_ns); } /** @@ -1680,7 +1680,7 @@ static inline void inode_fsuid_set(struct inode *inode, static inline void inode_fsgid_set(struct inode *inode, struct user_namespace *mnt_userns) { - inode->i_gid = mapped_fsgid(mnt_userns); + inode->i_gid = mapped_fsgid(mnt_userns, &init_user_ns); } /** @@ -1701,10 +1701,10 @@ static inline bool fsuidgid_has_mapping(struct super_block *sb, kuid_t kuid; kgid_t kgid; - kuid = mapped_fsuid(mnt_userns); + kuid = mapped_fsuid(mnt_userns, &init_user_ns); if (!uid_valid(kuid)) return false; - kgid = mapped_fsgid(mnt_userns); + kgid = mapped_fsgid(mnt_userns, &init_user_ns); if (!gid_valid(kgid)) return false; return kuid_has_mapping(fs_userns, kuid) && diff --git a/include/linux/mnt_idmapping.h b/include/linux/mnt_idmapping.h index 0c6ab3f4c95275..ee5a217de2a880 100644 --- a/include/linux/mnt_idmapping.h +++ b/include/linux/mnt_idmapping.h @@ -196,6 +196,7 @@ static inline kgid_t mapped_kgid_user(struct user_namespace *mnt_userns, /** * mapped_fsuid - return caller's fsuid mapped up into a mnt_userns * @mnt_userns: the mount's idmapping + * @fs_userns: the filesystem's idmapping * * Use this helper to initialize a new vfs or filesystem object based on * the caller's fsuid. A common example is initializing the i_uid field of @@ -205,14 +206,16 @@ static inline kgid_t mapped_kgid_user(struct user_namespace *mnt_userns, * * Return: the caller's current fsuid mapped up according to @mnt_userns. */ -static inline kuid_t mapped_fsuid(struct user_namespace *mnt_userns) +static inline kuid_t mapped_fsuid(struct user_namespace *mnt_userns, + struct user_namespace *fs_userns) { - return mapped_kuid_user(mnt_userns, &init_user_ns, current_fsuid()); + return mapped_kuid_user(mnt_userns, fs_userns, current_fsuid()); } /** * mapped_fsgid - return caller's fsgid mapped up into a mnt_userns * @mnt_userns: the mount's idmapping + * @fs_userns: the filesystem's idmapping * * Use this helper to initialize a new vfs or filesystem object based on * the caller's fsgid. A common example is initializing the i_gid field of @@ -222,9 +225,10 @@ static inline kuid_t mapped_fsuid(struct user_namespace *mnt_userns) * * Return: the caller's current fsgid mapped up according to @mnt_userns. */ -static inline kgid_t mapped_fsgid(struct user_namespace *mnt_userns) +static inline kgid_t mapped_fsgid(struct user_namespace *mnt_userns, + struct user_namespace *fs_userns) { - return mapped_kgid_user(mnt_userns, &init_user_ns, current_fsgid()); + return mapped_kgid_user(mnt_userns, fs_userns, current_fsgid()); } #endif /* _LINUX_MNT_IDMAPPING_H */ From 968e66f8ff70285744ac506cd5668223d5eebe41 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Tue, 28 Jun 2022 14:16:17 +0200 Subject: [PATCH 0156/1056] fs: add i_user_ns() helper commit a1ec9040a2a9122605ac26e5725c6de019184419 upstream. Since we'll be passing the filesystem's idmapping in even more places in the following patches and we do already dereference struct inode to get to the filesystem's idmapping multiple times add a tiny helper. Link: https://lore.kernel.org/r/20211123114227.3124056-10-brauner@kernel.org (v1) Link: https://lore.kernel.org/r/20211130121032.3753852-10-brauner@kernel.org (v2) Link: https://lore.kernel.org/r/20211203111707.3901969-10-brauner@kernel.org Cc: Seth Forshee Cc: Christoph Hellwig Cc: Al Viro CC: linux-fsdevel@vger.kernel.org Reviewed-by: Amir Goldstein Reviewed-by: Seth Forshee Signed-off-by: Christian Brauner Signed-off-by: Christian Brauner (Microsoft) Signed-off-by: Greg Kroah-Hartman --- include/linux/fs.h | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/include/linux/fs.h b/include/linux/fs.h index f62c1b9cd7cded..d418dffb168183 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1602,6 +1602,11 @@ struct super_block { struct list_head s_inodes_wb; /* writeback inodes */ } __randomize_layout; +static inline struct user_namespace *i_user_ns(const struct inode *inode) +{ + return inode->i_sb->s_user_ns; +} + /* Helper functions so that in most cases filesystems will * not need to deal directly with kuid_t and kgid_t and can * instead deal with the raw numeric values that are stored @@ -1609,22 +1614,22 @@ struct super_block { */ static inline uid_t i_uid_read(const struct inode *inode) { - return from_kuid(inode->i_sb->s_user_ns, inode->i_uid); + return from_kuid(i_user_ns(inode), inode->i_uid); } static inline gid_t i_gid_read(const struct inode *inode) { - return from_kgid(inode->i_sb->s_user_ns, inode->i_gid); + return from_kgid(i_user_ns(inode), inode->i_gid); } static inline void i_uid_write(struct inode *inode, uid_t uid) { - inode->i_uid = make_kuid(inode->i_sb->s_user_ns, uid); + inode->i_uid = make_kuid(i_user_ns(inode), uid); } static inline void i_gid_write(struct inode *inode, gid_t gid) { - inode->i_gid = make_kgid(inode->i_sb->s_user_ns, gid); + inode->i_gid = make_kgid(i_user_ns(inode), gid); } /** From 38753e9173a5903e902c856b41fb325762bf5945 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Tue, 28 Jun 2022 14:16:18 +0200 Subject: [PATCH 0157/1056] fs: support mapped mounts of mapped filesystems commit bd303368b776eead1c29e6cdda82bde7128b82a7 upstream. In previous patches we added new and modified existing helpers to handle idmapped mounts of filesystems mounted with an idmapping. In this final patch we convert all relevant places in the vfs to actually pass the filesystem's idmapping into these helpers. With this the vfs is in shape to handle idmapped mounts of filesystems mounted with an idmapping. Note that this is just the generic infrastructure. Actually adding support for idmapped mounts to a filesystem mountable with an idmapping is follow-up work. In this patch we extend the definition of an idmapped mount from a mount that that has the initial idmapping attached to it to a mount that has an idmapping attached to it which is not the same as the idmapping the filesystem was mounted with. As before we do not allow the initial idmapping to be attached to a mount. In addition this patch prevents that the idmapping the filesystem was mounted with can be attached to a mount created based on this filesystem. This has multiple reasons and advantages. First, attaching the initial idmapping or the filesystem's idmapping doesn't make much sense as in both cases the values of the i_{g,u}id and other places where k{g,u}ids are used do not change. Second, a user that really wants to do this for whatever reason can just create a separate dedicated identical idmapping to attach to the mount. Third, we can continue to use the initial idmapping as an indicator that a mount is not idmapped allowing us to continue to keep passing the initial idmapping into the mapping helpers to tell them that something isn't an idmapped mount even if the filesystem is mounted with an idmapping. Link: https://lore.kernel.org/r/20211123114227.3124056-11-brauner@kernel.org (v1) Link: https://lore.kernel.org/r/20211130121032.3753852-11-brauner@kernel.org (v2) Link: https://lore.kernel.org/r/20211203111707.3901969-11-brauner@kernel.org Cc: Seth Forshee Cc: Amir Goldstein Cc: Christoph Hellwig Cc: Al Viro CC: linux-fsdevel@vger.kernel.org Reviewed-by: Seth Forshee Signed-off-by: Christian Brauner Signed-off-by: Christian Brauner (Microsoft) Signed-off-by: Greg Kroah-Hartman --- fs/namespace.c | 51 +++++++++++++++++++++++++++++++++----------- fs/open.c | 7 +++--- fs/posix_acl.c | 8 +++---- include/linux/fs.h | 17 ++++++++------- security/commoncap.c | 9 ++++---- 5 files changed, 59 insertions(+), 33 deletions(-) diff --git a/fs/namespace.c b/fs/namespace.c index 5b5aa107b9f33c..dc31ad6b370f39 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -31,6 +31,7 @@ #include #include #include +#include #include "pnode.h" #include "internal.h" @@ -561,7 +562,7 @@ static void free_vfsmnt(struct mount *mnt) struct user_namespace *mnt_userns; mnt_userns = mnt_user_ns(&mnt->mnt); - if (mnt_userns != &init_user_ns) + if (!initial_idmapping(mnt_userns)) put_user_ns(mnt_userns); kfree_const(mnt->mnt_devname); #ifdef CONFIG_SMP @@ -965,6 +966,7 @@ static struct mount *skip_mnt_tree(struct mount *p) struct vfsmount *vfs_create_mount(struct fs_context *fc) { struct mount *mnt; + struct user_namespace *fs_userns; if (!fc->root) return ERR_PTR(-EINVAL); @@ -982,6 +984,10 @@ struct vfsmount *vfs_create_mount(struct fs_context *fc) mnt->mnt_mountpoint = mnt->mnt.mnt_root; mnt->mnt_parent = mnt; + fs_userns = mnt->mnt.mnt_sb->s_user_ns; + if (!initial_idmapping(fs_userns)) + mnt->mnt.mnt_userns = get_user_ns(fs_userns); + lock_mount_hash(); list_add_tail(&mnt->mnt_instance, &mnt->mnt.mnt_sb->s_mounts); unlock_mount_hash(); @@ -1072,7 +1078,7 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root, atomic_inc(&sb->s_active); mnt->mnt.mnt_userns = mnt_user_ns(&old->mnt); - if (mnt->mnt.mnt_userns != &init_user_ns) + if (!initial_idmapping(mnt->mnt.mnt_userns)) mnt->mnt.mnt_userns = get_user_ns(mnt->mnt.mnt_userns); mnt->mnt.mnt_sb = sb; mnt->mnt.mnt_root = dget(root); @@ -3927,10 +3933,18 @@ static unsigned int recalc_flags(struct mount_kattr *kattr, struct mount *mnt) static int can_idmap_mount(const struct mount_kattr *kattr, struct mount *mnt) { struct vfsmount *m = &mnt->mnt; + struct user_namespace *fs_userns = m->mnt_sb->s_user_ns; if (!kattr->mnt_userns) return 0; + /* + * Creating an idmapped mount with the filesystem wide idmapping + * doesn't make sense so block that. We don't allow mushy semantics. + */ + if (kattr->mnt_userns == fs_userns) + return -EINVAL; + /* * Once a mount has been idmapped we don't allow it to change its * mapping. It makes things simpler and callers can just create @@ -3943,12 +3957,8 @@ static int can_idmap_mount(const struct mount_kattr *kattr, struct mount *mnt) if (!(m->mnt_sb->s_type->fs_flags & FS_ALLOW_IDMAP)) return -EINVAL; - /* Don't yet support filesystem mountable in user namespaces. */ - if (m->mnt_sb->s_user_ns != &init_user_ns) - return -EINVAL; - /* We're not controlling the superblock. */ - if (!capable(CAP_SYS_ADMIN)) + if (!ns_capable(fs_userns, CAP_SYS_ADMIN)) return -EPERM; /* Mount has already been visible in the filesystem hierarchy. */ @@ -4002,14 +4012,27 @@ static struct mount *mount_setattr_prepare(struct mount_kattr *kattr, static void do_idmap_mount(const struct mount_kattr *kattr, struct mount *mnt) { - struct user_namespace *mnt_userns; + struct user_namespace *mnt_userns, *old_mnt_userns; if (!kattr->mnt_userns) return; + /* + * We're the only ones able to change the mount's idmapping. So + * mnt->mnt.mnt_userns is stable and we can retrieve it directly. + */ + old_mnt_userns = mnt->mnt.mnt_userns; + mnt_userns = get_user_ns(kattr->mnt_userns); /* Pairs with smp_load_acquire() in mnt_user_ns(). */ smp_store_release(&mnt->mnt.mnt_userns, mnt_userns); + + /* + * If this is an idmapped filesystem drop the reference we've taken + * in vfs_create_mount() before. + */ + if (!initial_idmapping(old_mnt_userns)) + put_user_ns(old_mnt_userns); } static void mount_setattr_commit(struct mount_kattr *kattr, @@ -4133,13 +4156,15 @@ static int build_mount_idmapped(const struct mount_attr *attr, size_t usize, } /* - * The init_user_ns is used to indicate that a vfsmount is not idmapped. - * This is simpler than just having to treat NULL as unmapped. Users - * wanting to idmap a mount to init_user_ns can just use a namespace - * with an identity mapping. + * The initial idmapping cannot be used to create an idmapped + * mount. We use the initial idmapping as an indicator of a mount + * that is not idmapped. It can simply be passed into helpers that + * are aware of idmapped mounts as a convenient shortcut. A user + * can just create a dedicated identity mapping to achieve the same + * result. */ mnt_userns = container_of(ns, struct user_namespace, ns); - if (mnt_userns == &init_user_ns) { + if (initial_idmapping(mnt_userns)) { err = -EPERM; goto out_fput; } diff --git a/fs/open.c b/fs/open.c index 31c47abbddf675..1ba1d2ab2ef0b9 100644 --- a/fs/open.c +++ b/fs/open.c @@ -641,7 +641,7 @@ SYSCALL_DEFINE2(chmod, const char __user *, filename, umode_t, mode) int chown_common(const struct path *path, uid_t user, gid_t group) { - struct user_namespace *mnt_userns; + struct user_namespace *mnt_userns, *fs_userns; struct inode *inode = path->dentry->d_inode; struct inode *delegated_inode = NULL; int error; @@ -653,8 +653,9 @@ int chown_common(const struct path *path, uid_t user, gid_t group) gid = make_kgid(current_user_ns(), group); mnt_userns = mnt_user_ns(path->mnt); - uid = mapped_kuid_user(mnt_userns, &init_user_ns, uid); - gid = mapped_kgid_user(mnt_userns, &init_user_ns, gid); + fs_userns = i_user_ns(inode); + uid = mapped_kuid_user(mnt_userns, fs_userns, uid); + gid = mapped_kgid_user(mnt_userns, fs_userns, gid); retry_deleg: newattrs.ia_valid = ATTR_CTIME; diff --git a/fs/posix_acl.c b/fs/posix_acl.c index 3d7ee193424a18..d6c7b620fb8f7c 100644 --- a/fs/posix_acl.c +++ b/fs/posix_acl.c @@ -377,8 +377,8 @@ posix_acl_permission(struct user_namespace *mnt_userns, struct inode *inode, break; case ACL_USER: uid = mapped_kuid_fs(mnt_userns, - &init_user_ns, - pa->e_uid); + i_user_ns(inode), + pa->e_uid); if (uid_eq(uid, current_fsuid())) goto mask; break; @@ -392,8 +392,8 @@ posix_acl_permission(struct user_namespace *mnt_userns, struct inode *inode, break; case ACL_GROUP: gid = mapped_kgid_fs(mnt_userns, - &init_user_ns, - pa->e_gid); + i_user_ns(inode), + pa->e_gid); if (in_group_p(gid)) { found = 1; if ((pa->e_perm & want) == want) diff --git a/include/linux/fs.h b/include/linux/fs.h index d418dffb168183..76162f04667055 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1643,7 +1643,7 @@ static inline void i_gid_write(struct inode *inode, gid_t gid) static inline kuid_t i_uid_into_mnt(struct user_namespace *mnt_userns, const struct inode *inode) { - return mapped_kuid_fs(mnt_userns, &init_user_ns, inode->i_uid); + return mapped_kuid_fs(mnt_userns, i_user_ns(inode), inode->i_uid); } /** @@ -1657,7 +1657,7 @@ static inline kuid_t i_uid_into_mnt(struct user_namespace *mnt_userns, static inline kgid_t i_gid_into_mnt(struct user_namespace *mnt_userns, const struct inode *inode) { - return mapped_kgid_fs(mnt_userns, &init_user_ns, inode->i_gid); + return mapped_kgid_fs(mnt_userns, i_user_ns(inode), inode->i_gid); } /** @@ -1671,7 +1671,7 @@ static inline kgid_t i_gid_into_mnt(struct user_namespace *mnt_userns, static inline void inode_fsuid_set(struct inode *inode, struct user_namespace *mnt_userns) { - inode->i_uid = mapped_fsuid(mnt_userns, &init_user_ns); + inode->i_uid = mapped_fsuid(mnt_userns, i_user_ns(inode)); } /** @@ -1685,7 +1685,7 @@ static inline void inode_fsuid_set(struct inode *inode, static inline void inode_fsgid_set(struct inode *inode, struct user_namespace *mnt_userns) { - inode->i_gid = mapped_fsgid(mnt_userns, &init_user_ns); + inode->i_gid = mapped_fsgid(mnt_userns, i_user_ns(inode)); } /** @@ -1706,10 +1706,10 @@ static inline bool fsuidgid_has_mapping(struct super_block *sb, kuid_t kuid; kgid_t kgid; - kuid = mapped_fsuid(mnt_userns, &init_user_ns); + kuid = mapped_fsuid(mnt_userns, fs_userns); if (!uid_valid(kuid)) return false; - kgid = mapped_fsgid(mnt_userns, &init_user_ns); + kgid = mapped_fsgid(mnt_userns, fs_userns); if (!gid_valid(kgid)) return false; return kuid_has_mapping(fs_userns, kuid) && @@ -2655,13 +2655,14 @@ static inline struct user_namespace *file_mnt_user_ns(struct file *file) * is_idmapped_mnt - check whether a mount is mapped * @mnt: the mount to check * - * If @mnt has an idmapping attached to it @mnt is mapped. + * If @mnt has an idmapping attached different from the + * filesystem's idmapping then @mnt is mapped. * * Return: true if mount is mapped, false if not. */ static inline bool is_idmapped_mnt(const struct vfsmount *mnt) { - return mnt_user_ns(mnt) != &init_user_ns; + return mnt_user_ns(mnt) != mnt->mnt_sb->s_user_ns; } extern long vfs_truncate(const struct path *, loff_t); diff --git a/security/commoncap.c b/security/commoncap.c index d288a62e299967..5fc8986c3c77cd 100644 --- a/security/commoncap.c +++ b/security/commoncap.c @@ -419,7 +419,7 @@ int cap_inode_getsecurity(struct user_namespace *mnt_userns, kroot = make_kuid(fs_ns, root); /* If this is an idmapped mount shift the kuid. */ - kroot = mapped_kuid_fs(mnt_userns, &init_user_ns, kroot); + kroot = mapped_kuid_fs(mnt_userns, fs_ns, kroot); /* If the root kuid maps to a valid uid in current ns, then return * this as a nscap. */ @@ -556,13 +556,12 @@ int cap_convert_nscap(struct user_namespace *mnt_userns, struct dentry *dentry, return -EINVAL; if (!capable_wrt_inode_uidgid(mnt_userns, inode, CAP_SETFCAP)) return -EPERM; - if (size == XATTR_CAPS_SZ_2 && (mnt_userns == &init_user_ns)) + if (size == XATTR_CAPS_SZ_2 && (mnt_userns == fs_ns)) if (ns_capable(inode->i_sb->s_user_ns, CAP_SETFCAP)) /* user is privileged, just write the v2 */ return size; - rootid = rootid_from_xattr(*ivalue, size, task_ns, mnt_userns, - &init_user_ns); + rootid = rootid_from_xattr(*ivalue, size, task_ns, mnt_userns, fs_ns); if (!uid_valid(rootid)) return -EINVAL; @@ -703,7 +702,7 @@ int get_vfs_caps_from_disk(struct user_namespace *mnt_userns, /* Limit the caps to the mounter of the filesystem * or the more limited uid specified in the xattr. */ - rootkuid = mapped_kuid_fs(mnt_userns, &init_user_ns, rootkuid); + rootkuid = mapped_kuid_fs(mnt_userns, fs_ns, rootkuid); if (!rootid_owns_currentns(rootkuid)) return -ENODATA; From dc85bc24fbf13d2ef36d41da970776c4f94b68dd Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Tue, 28 Jun 2022 14:16:19 +0200 Subject: [PATCH 0158/1056] fs: fix acl translation commit 705191b03d507744c7e097f78d583621c14988ac upstream. Last cycle we extended the idmapped mounts infrastructure to support idmapped mounts of idmapped filesystems (No such filesystem yet exist.). Since then, the meaning of an idmapped mount is a mount whose idmapping is different from the filesystems idmapping. While doing that work we missed to adapt the acl translation helpers. They still assume that checking for the identity mapping is enough. But they need to use the no_idmapping() helper instead. Note, POSIX ACLs are always translated right at the userspace-kernel boundary using the caller's current idmapping and the initial idmapping. The order depends on whether we're coming from or going to userspace. The filesystem's idmapping doesn't matter at the border. Consequently, if a non-idmapped mount is passed we need to make sure to always pass the initial idmapping as the mount's idmapping and not the filesystem idmapping. Since it's irrelevant here it would yield invalid ids and prevent setting acls for filesystems that are mountable in a userns and support posix acls (tmpfs and fuse). I verified the regression reported in [1] and verified that this patch fixes it. A regression test will be added to xfstests in parallel. Link: https://bugzilla.kernel.org/show_bug.cgi?id=215849 [1] Fixes: bd303368b776 ("fs: support mapped mounts of mapped filesystems") Cc: Seth Forshee Cc: Christoph Hellwig Cc: # 5.15+ Cc: Signed-off-by: Christian Brauner (Microsoft) Signed-off-by: Linus Torvalds Signed-off-by: Christian Brauner (Microsoft) Signed-off-by: Greg Kroah-Hartman --- fs/posix_acl.c | 10 ++++++++++ fs/xattr.c | 6 ++++-- include/linux/posix_acl_xattr.h | 4 ++++ 3 files changed, 18 insertions(+), 2 deletions(-) diff --git a/fs/posix_acl.c b/fs/posix_acl.c index d6c7b620fb8f7c..ceb1e3b868577c 100644 --- a/fs/posix_acl.c +++ b/fs/posix_acl.c @@ -760,9 +760,14 @@ static void posix_acl_fix_xattr_userns( } void posix_acl_fix_xattr_from_user(struct user_namespace *mnt_userns, + struct inode *inode, void *value, size_t size) { struct user_namespace *user_ns = current_user_ns(); + + /* Leave ids untouched on non-idmapped mounts. */ + if (no_idmapping(mnt_userns, i_user_ns(inode))) + mnt_userns = &init_user_ns; if ((user_ns == &init_user_ns) && (mnt_userns == &init_user_ns)) return; posix_acl_fix_xattr_userns(&init_user_ns, user_ns, mnt_userns, value, @@ -770,9 +775,14 @@ void posix_acl_fix_xattr_from_user(struct user_namespace *mnt_userns, } void posix_acl_fix_xattr_to_user(struct user_namespace *mnt_userns, + struct inode *inode, void *value, size_t size) { struct user_namespace *user_ns = current_user_ns(); + + /* Leave ids untouched on non-idmapped mounts. */ + if (no_idmapping(mnt_userns, i_user_ns(inode))) + mnt_userns = &init_user_ns; if ((user_ns == &init_user_ns) && (mnt_userns == &init_user_ns)) return; posix_acl_fix_xattr_userns(user_ns, &init_user_ns, mnt_userns, value, diff --git a/fs/xattr.c b/fs/xattr.c index 5c8c5175b385c2..998045165916ed 100644 --- a/fs/xattr.c +++ b/fs/xattr.c @@ -569,7 +569,8 @@ setxattr(struct user_namespace *mnt_userns, struct dentry *d, } if ((strcmp(kname, XATTR_NAME_POSIX_ACL_ACCESS) == 0) || (strcmp(kname, XATTR_NAME_POSIX_ACL_DEFAULT) == 0)) - posix_acl_fix_xattr_from_user(mnt_userns, kvalue, size); + posix_acl_fix_xattr_from_user(mnt_userns, d_inode(d), + kvalue, size); } error = vfs_setxattr(mnt_userns, d, kname, kvalue, size, flags); @@ -667,7 +668,8 @@ getxattr(struct user_namespace *mnt_userns, struct dentry *d, if (error > 0) { if ((strcmp(kname, XATTR_NAME_POSIX_ACL_ACCESS) == 0) || (strcmp(kname, XATTR_NAME_POSIX_ACL_DEFAULT) == 0)) - posix_acl_fix_xattr_to_user(mnt_userns, kvalue, error); + posix_acl_fix_xattr_to_user(mnt_userns, d_inode(d), + kvalue, error); if (size && copy_to_user(value, kvalue, error)) error = -EFAULT; } else if (error == -ERANGE && size >= XATTR_SIZE_MAX) { diff --git a/include/linux/posix_acl_xattr.h b/include/linux/posix_acl_xattr.h index 060e8d20318144..1766e1de695600 100644 --- a/include/linux/posix_acl_xattr.h +++ b/include/linux/posix_acl_xattr.h @@ -34,15 +34,19 @@ posix_acl_xattr_count(size_t size) #ifdef CONFIG_FS_POSIX_ACL void posix_acl_fix_xattr_from_user(struct user_namespace *mnt_userns, + struct inode *inode, void *value, size_t size); void posix_acl_fix_xattr_to_user(struct user_namespace *mnt_userns, + struct inode *inode, void *value, size_t size); #else static inline void posix_acl_fix_xattr_from_user(struct user_namespace *mnt_userns, + struct inode *inode, void *value, size_t size) { } static inline void posix_acl_fix_xattr_to_user(struct user_namespace *mnt_userns, + struct inode *inode, void *value, size_t size) { } From e8d4878dcd00239c54b089314c625477c5a20a7f Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Tue, 28 Jun 2022 14:16:20 +0200 Subject: [PATCH 0159/1056] fs: account for group membership commit 168f912893407a5acb798a4a58613b5f1f98c717 upstream. When calling setattr_prepare() to determine the validity of the attributes the ia_{g,u}id fields contain the value that will be written to inode->i_{g,u}id. This is exactly the same for idmapped and non-idmapped mounts and allows callers to pass in the values they want to see written to inode->i_{g,u}id. When group ownership is changed a caller whose fsuid owns the inode can change the group of the inode to any group they are a member of. When searching through the caller's groups we need to use the gid mapped according to the idmapped mount otherwise we will fail to change ownership for unprivileged users. Consider a caller running with fsuid and fsgid 1000 using an idmapped mount that maps id 65534 to 1000 and 65535 to 1001. Consequently, a file owned by 65534:65535 in the filesystem will be owned by 1000:1001 in the idmapped mount. The caller now requests the gid of the file to be changed to 1000 going through the idmapped mount. In the vfs we will immediately map the requested gid to the value that will need to be written to inode->i_gid and place it in attr->ia_gid. Since this idmapped mount maps 65534 to 1000 we place 65534 in attr->ia_gid. When we check whether the caller is allowed to change group ownership we first validate that their fsuid matches the inode's uid. The inode->i_uid is 65534 which is mapped to uid 1000 in the idmapped mount. Since the caller's fsuid is 1000 we pass the check. We now check whether the caller is allowed to change inode->i_gid to the requested gid by calling in_group_p(). This will compare the passed in gid to the caller's fsgid and search the caller's additional groups. Since we're dealing with an idmapped mount we need to pass in the gid mapped according to the idmapped mount. This is akin to checking whether a caller is privileged over the future group the inode is owned by. And that needs to take the idmapped mount into account. Note, all helpers are nops without idmapped mounts. New regression test sent to xfstests. Link: https://github.com/lxc/lxd/issues/10537 Link: https://lore.kernel.org/r/20220613111517.2186646-1-brauner@kernel.org Fixes: 2f221d6f7b88 ("attr: handle idmapped mounts") Cc: Seth Forshee Cc: Christoph Hellwig Cc: Aleksa Sarai Cc: Al Viro Cc: stable@vger.kernel.org # 5.15+ CC: linux-fsdevel@vger.kernel.org Reviewed-by: Seth Forshee Signed-off-by: Christian Brauner (Microsoft) Signed-off-by: Greg Kroah-Hartman --- fs/attr.c | 26 ++++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/fs/attr.c b/fs/attr.c index 66899b6e9bd861..dbe996b0dedfcf 100644 --- a/fs/attr.c +++ b/fs/attr.c @@ -61,9 +61,15 @@ static bool chgrp_ok(struct user_namespace *mnt_userns, const struct inode *inode, kgid_t gid) { kgid_t kgid = i_gid_into_mnt(mnt_userns, inode); - if (uid_eq(current_fsuid(), i_uid_into_mnt(mnt_userns, inode)) && - (in_group_p(gid) || gid_eq(gid, inode->i_gid))) - return true; + if (uid_eq(current_fsuid(), i_uid_into_mnt(mnt_userns, inode))) { + kgid_t mapped_gid; + + if (gid_eq(gid, inode->i_gid)) + return true; + mapped_gid = mapped_kgid_fs(mnt_userns, i_user_ns(inode), gid); + if (in_group_p(mapped_gid)) + return true; + } if (capable_wrt_inode_uidgid(mnt_userns, inode, CAP_CHOWN)) return true; if (gid_eq(kgid, INVALID_GID) && @@ -123,12 +129,20 @@ int setattr_prepare(struct user_namespace *mnt_userns, struct dentry *dentry, /* Make sure a caller can chmod. */ if (ia_valid & ATTR_MODE) { + kgid_t mapped_gid; + if (!inode_owner_or_capable(mnt_userns, inode)) return -EPERM; + + if (ia_valid & ATTR_GID) + mapped_gid = mapped_kgid_fs(mnt_userns, + i_user_ns(inode), attr->ia_gid); + else + mapped_gid = i_gid_into_mnt(mnt_userns, inode); + /* Also check the setgid bit! */ - if (!in_group_p((ia_valid & ATTR_GID) ? attr->ia_gid : - i_gid_into_mnt(mnt_userns, inode)) && - !capable_wrt_inode_uidgid(mnt_userns, inode, CAP_FSETID)) + if (!in_group_p(mapped_gid) && + !capable_wrt_inode_uidgid(mnt_userns, inode, CAP_FSETID)) attr->ia_mode &= ~S_ISGID; } From d52f1c58882477e1ed7dfb99c95fa3fd56936035 Mon Sep 17 00:00:00 2001 From: Guo-Feng Fan Date: Wed, 22 Sep 2021 10:36:36 +0800 Subject: [PATCH 0160/1056] rtw88: 8821c: support RFE type4 wifi NIC commit b789e3fe7047296be0ccdbb7ceb0b58856053572 upstream. RFE type4 is a new NIC which has one RF antenna shares with BT. RFE type4 HW is the same as RFE type2 but attaching antenna to aux antenna connector. RFE type2 attach antenna to main antenna connector. Load the same parameter as RFE type2 when initializing NIC. Signed-off-by: Guo-Feng Fan Signed-off-by: Ping-Ke Shih Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20210922023637.9357-1-pkshih@realtek.com Signed-off-by: Meng Tang Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/realtek/rtw88/rtw8821c.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/realtek/rtw88/rtw8821c.c b/drivers/net/wireless/realtek/rtw88/rtw8821c.c index f405f42d1c1b02..746f6f8967d821 100644 --- a/drivers/net/wireless/realtek/rtw88/rtw8821c.c +++ b/drivers/net/wireless/realtek/rtw88/rtw8821c.c @@ -304,7 +304,8 @@ static void rtw8821c_set_channel_rf(struct rtw_dev *rtwdev, u8 channel, u8 bw) if (channel <= 14) { if (rtwdev->efuse.rfe_option == 0) rtw8821c_switch_rf_set(rtwdev, SWITCH_TO_WLG); - else if (rtwdev->efuse.rfe_option == 2) + else if (rtwdev->efuse.rfe_option == 2 || + rtwdev->efuse.rfe_option == 4) rtw8821c_switch_rf_set(rtwdev, SWITCH_TO_BTG); rtw_write_rf(rtwdev, RF_PATH_A, RF_LUTDBG, BIT(6), 0x1); rtw_write_rf(rtwdev, RF_PATH_A, 0x64, 0xf, 0xf); @@ -777,6 +778,15 @@ static void rtw8821c_coex_cfg_ant_switch(struct rtw_dev *rtwdev, u8 ctrl_type, if (switch_status == coex_dm->cur_switch_status) return; + if (coex_rfe->wlg_at_btg) { + ctrl_type = COEX_SWITCH_CTRL_BY_BBSW; + + if (coex_rfe->ant_switch_polarity) + pos_type = COEX_SWITCH_TO_WLA; + else + pos_type = COEX_SWITCH_TO_WLG_BT; + } + coex_dm->cur_switch_status = switch_status; if (coex_rfe->ant_switch_diversity && @@ -1502,6 +1512,7 @@ static const struct rtw_intf_phy_para_table phy_para_table_8821c = { static const struct rtw_rfe_def rtw8821c_rfe_defs[] = { [0] = RTW_DEF_RFE(8821c, 0, 0), [2] = RTW_DEF_RFE_EXT(8821c, 0, 0, 2), + [4] = RTW_DEF_RFE_EXT(8821c, 0, 0, 2), }; static struct rtw_hw_reg rtw8821c_dig[] = { From 810962c794178b44c2c29d61d51e2e62d61d0218 Mon Sep 17 00:00:00 2001 From: Ping-Ke Shih Date: Thu, 6 Jan 2022 20:47:39 -0600 Subject: [PATCH 0161/1056] rtw88: rtw8821c: enable rfe 6 devices commit e109e3617e5d563b431a52e6e2f07f0fc65a93ae upstream. Ping-Ke Shih answered[1] a question for a user about an rtl8821ce device that reported RFE 6, which the driver did not support. Ping-Ke suggested a possible fix, but the user never reported back. A second user discovered the above thread and tested the proposed fix. Accordingly, I am pushing this change, even though I am not the author. [1] https://lore.kernel.org/linux-wireless/3f5e2f6eac344316b5dd518ebfea2f95@realtek.com/ Signed-off-by: Ping-Ke Shih Reported-and-tested-by: masterzorag Signed-off-by: Larry Finger Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20220107024739.20967-1-Larry.Finger@lwfinger.net Signed-off-by: Meng Tang Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/realtek/rtw88/rtw8821c.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/realtek/rtw88/rtw8821c.c b/drivers/net/wireless/realtek/rtw88/rtw8821c.c index 746f6f8967d821..897da3ed2f0299 100644 --- a/drivers/net/wireless/realtek/rtw88/rtw8821c.c +++ b/drivers/net/wireless/realtek/rtw88/rtw8821c.c @@ -1513,6 +1513,7 @@ static const struct rtw_rfe_def rtw8821c_rfe_defs[] = { [0] = RTW_DEF_RFE(8821c, 0, 0), [2] = RTW_DEF_RFE_EXT(8821c, 0, 0, 2), [4] = RTW_DEF_RFE_EXT(8821c, 0, 0, 2), + [6] = RTW_DEF_RFE(8821c, 0, 0), }; static struct rtw_hw_reg rtw8821c_dig[] = { From 5696f7983d5d0dc070b4c0c07969d528aa553827 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 28 Jun 2022 20:20:15 +0300 Subject: [PATCH 0162/1056] net: mscc: ocelot: allow unregistered IP multicast flooding to CPU Since commit 4cf35a2b627a ("net: mscc: ocelot: fix broken IP multicast flooding") from v5.12, unregistered IP multicast flooding is configurable in the ocelot driver for bridged ports. However, by writing 0 to the PGID_MCIPV4 and PGID_MCIPV6 port masks at initialization time, the CPU port module, for which ocelot_port_set_mcast_flood() is not called, will have unknown IP multicast flooding disabled. This makes it impossible for an application such as smcroute to work properly, since all IP multicast traffic received on a standalone port is treated as unregistered (and dropped). Starting with commit 7569459a52c9 ("net: dsa: manage flooding on the CPU ports"), the limitation above has been lifted, because when standalone ports become IFF_PROMISC or IFF_ALLMULTI, ocelot_port_set_mcast_flood() would be called on the CPU port module, so unregistered multicast is flooded to the CPU on an as-needed basis. But between v5.12 and v5.18, IP multicast flooding to the CPU has remained broken, promiscuous or not. Delete the inexplicable premature optimization of clearing PGID_MCIPV4 and PGID_MCIPV6 as part of the init sequence, and allow unregistered IP multicast to be flooded freely to the CPU port module. Fixes: a556c76adc05 ("net: mscc: Add initial Ocelot switch support") Cc: stable@kernel.org Signed-off-by: Vladimir Oltean Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mscc/ocelot.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c index a59300d9e00001..96b1e394a397f7 100644 --- a/drivers/net/ethernet/mscc/ocelot.c +++ b/drivers/net/ethernet/mscc/ocelot.c @@ -2206,11 +2206,15 @@ int ocelot_init(struct ocelot *ocelot) ocelot_rmw_rix(ocelot, ANA_PGID_PGID_PGID(BIT(ocelot->num_phys_ports)), ANA_PGID_PGID_PGID(BIT(ocelot->num_phys_ports)), ANA_PGID_PGID, PGID_MC); + ocelot_rmw_rix(ocelot, ANA_PGID_PGID_PGID(BIT(ocelot->num_phys_ports)), + ANA_PGID_PGID_PGID(BIT(ocelot->num_phys_ports)), + ANA_PGID_PGID, PGID_MCIPV4); + ocelot_rmw_rix(ocelot, ANA_PGID_PGID_PGID(BIT(ocelot->num_phys_ports)), + ANA_PGID_PGID_PGID(BIT(ocelot->num_phys_ports)), + ANA_PGID_PGID, PGID_MCIPV6); ocelot_rmw_rix(ocelot, ANA_PGID_PGID_PGID(BIT(ocelot->num_phys_ports)), ANA_PGID_PGID_PGID(BIT(ocelot->num_phys_ports)), ANA_PGID_PGID, PGID_BC); - ocelot_write_rix(ocelot, 0, ANA_PGID_PGID, PGID_MCIPV4); - ocelot_write_rix(ocelot, 0, ANA_PGID_PGID, PGID_MCIPV6); /* Allow manual injection via DEVCPU_QS registers, and byte swap these * registers endianness. From ea512d540a55a75058ee4b78abfd1d499b229d1d Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Thu, 9 Jun 2022 08:34:35 +0100 Subject: [PATCH 0163/1056] io_uring: fix not locked access to fixed buf table commit 05b538c1765f8d14a71ccf5f85258dcbeaf189f7 upstream. We can look inside the fixed buffer table only while holding ->uring_lock, however in some cases we don't do the right async prep for IORING_OP_{WRITE,READ}_FIXED ending up with NULL req->imu forcing making an io-wq worker to try to resolve the fixed buffer without proper locking. Move req->imu setup into early req init paths, i.e. io_prep_rw(), which is called unconditionally for rw requests and under uring_lock. Fixes: 634d00df5e1cf ("io_uring: add full-fledged dynamic buffers support") Signed-off-by: Pavel Begunkov Signed-off-by: Greg Kroah-Hartman --- fs/io_uring.c | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index be21765753533b..a8470a98f84d84 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -2932,15 +2932,24 @@ static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe, kiocb->ki_complete = io_complete_rw; } + /* used for fixed read/write too - just read unconditionally */ + req->buf_index = READ_ONCE(sqe->buf_index); + req->imu = NULL; + if (req->opcode == IORING_OP_READ_FIXED || req->opcode == IORING_OP_WRITE_FIXED) { - req->imu = NULL; + struct io_ring_ctx *ctx = req->ctx; + u16 index; + + if (unlikely(req->buf_index >= ctx->nr_user_bufs)) + return -EFAULT; + index = array_index_nospec(req->buf_index, ctx->nr_user_bufs); + req->imu = ctx->user_bufs[index]; io_req_set_rsrc_node(req); } req->rw.addr = READ_ONCE(sqe->addr); req->rw.len = READ_ONCE(sqe->len); - req->buf_index = READ_ONCE(sqe->buf_index); return 0; } @@ -3066,18 +3075,9 @@ static int __io_import_fixed(struct io_kiocb *req, int rw, struct iov_iter *iter static int io_import_fixed(struct io_kiocb *req, int rw, struct iov_iter *iter) { - struct io_ring_ctx *ctx = req->ctx; - struct io_mapped_ubuf *imu = req->imu; - u16 index, buf_index = req->buf_index; - - if (likely(!imu)) { - if (unlikely(buf_index >= ctx->nr_user_bufs)) - return -EFAULT; - index = array_index_nospec(buf_index, ctx->nr_user_bufs); - imu = READ_ONCE(ctx->user_bufs[index]); - req->imu = imu; - } - return __io_import_fixed(req, rw, iter, imu); + if (WARN_ON_ONCE(!req->imu)) + return -EFAULT; + return __io_import_fixed(req, rw, iter, req->imu); } static void io_ring_submit_unlock(struct io_ring_ctx *ctx, bool needs_lock) From 545aecd229613138d6db54fb2b5221faca10137f Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sat, 2 Jul 2022 16:41:19 +0200 Subject: [PATCH 0164/1056] Linux 5.15.52 Link: https://lore.kernel.org/r/20220630133232.926711493@linuxfoundation.org Tested-by: Jon Hunter Tested-by: Shuah Khan Tested-by: Florian Fainelli Tested-by: Guenter Roeck Tested-by: Bagas Sanjaya Tested-by: Linux Kernel Functional Testing Tested-by: Sudip Mukherjee Tested-by: Christian Brauner (Microsoft) Tested-by: Ron Economos Signed-off-by: Greg Kroah-Hartman --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index b3bc9d907bed39..777e0a0eeccd11 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 5 PATCHLEVEL = 15 -SUBLEVEL = 51 +SUBLEVEL = 52 EXTRAVERSION = NAME = Trick or Treat From 990132bebcc81aa95c1a1ac4422b0364753d9d39 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 20 Jun 2022 18:29:39 -0400 Subject: [PATCH 0165/1056] Revert "drm/amdgpu/display: set vblank_disable_immediate for DC" commit a775e4e4941bf2f326aa36c58f67bd6c96cac717 upstream. This reverts commit 92020e81ddbeac351ea4a19bcf01743f32b9c800. This causes stuttering and timeouts with DMCUB for some users so revert it until we understand why and safely enable it to save power. Bug: https://gitlab.freedesktop.org/drm/amd/-/issues/1887 Acked-by: Harry Wentland Signed-off-by: Alex Deucher Cc: Nicholas Kazlauskas Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c | 1 + drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 3 --- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c index 4f3c62adccbdef..cc2e0c9cfe0a13 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c @@ -333,6 +333,7 @@ int amdgpu_irq_init(struct amdgpu_device *adev) if (!amdgpu_device_has_dc_support(adev)) { if (!adev->enable_virtual_display) /* Disable vblank IRQs aggressively for power-saving */ + /* XXX: can this be enabled for DC? */ adev_to_drm(adev)->vblank_disable_immediate = true; r = drm_vblank_init(adev_to_drm(adev), adev->mode_info.num_crtc); diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index d2aecf7bf66b47..d35a6f6d158eab 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -3838,9 +3838,6 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev) } #endif - /* Disable vblank IRQs aggressively for power-saving. */ - adev_to_drm(adev)->vblank_disable_immediate = true; - /* loops over all connectors on the board */ for (i = 0; i < link_cnt; i++) { struct dc_link *link = NULL; From 31c371b61d13dd457fdcc27cd728c04ee09f9cdb Mon Sep 17 00:00:00 2001 From: Ruili Ji Date: Wed, 22 Jun 2022 14:20:22 +0800 Subject: [PATCH 0166/1056] drm/amdgpu: To flush tlb for MMHUB of RAVEN series commit 5cb0e3fb2c54eabfb3f932a1574bff1774946bc0 upstream. amdgpu: [mmhub0] no-retry page fault (src_id:0 ring:40 vmid:8 pasid:32769, for process test_basic pid 3305 thread test_basic pid 3305) amdgpu: in page starting at address 0x00007ff990003000 from IH client 0x12 (VMC) amdgpu: VM_L2_PROTECTION_FAULT_STATUS:0x00840051 amdgpu: Faulty UTCL2 client ID: MP1 (0x0) amdgpu: MORE_FAULTS: 0x1 amdgpu: WALKER_ERROR: 0x0 amdgpu: PERMISSION_FAULTS: 0x5 amdgpu: MAPPING_ERROR: 0x0 amdgpu: RW: 0x1 When memory is allocated by kfd, no one triggers the tlb flush for MMHUB0. There is page fault from MMHUB0. v2:fix indentation v3:change subject and fix indentation Signed-off-by: Ruili Ji Reviewed-by: Philip Yang Reviewed-by: Aaron Liu Acked-by: Alex Deucher Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index 1d41c2c00623ba..5690cb6d27fed6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -768,7 +768,8 @@ int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct kgd_dev *kgd, uint16_t pasid, struct amdgpu_device *adev = (struct amdgpu_device *)kgd; bool all_hub = false; - if (adev->family == AMDGPU_FAMILY_AI) + if (adev->family == AMDGPU_FAMILY_AI || + adev->family == AMDGPU_FAMILY_RV) all_hub = true; return amdgpu_gmc_flush_gpu_tlb_pasid(adev, pasid, flush_type, all_hub); From 6791b57284f59715602e952f6eef8e6303108cf6 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Sun, 19 Jun 2022 22:35:48 +0900 Subject: [PATCH 0167/1056] ksmbd: set the range of bytes to zero without extending file size in FSCTL_ZERO_DATA commit 18e39fb960e6a908ac5230b57e3d0d6c25232368 upstream. generic/091, 263 test failed since commit f66f8b94e7f2 ("cifs: when extending a file with falloc we should make files not-sparse"). FSCTL_ZERO_DATA sets the range of bytes to zero without extending file size. The VFS_FALLOCATE_FL_KEEP_SIZE flag should be used even on non-sparse files. Cc: stable@vger.kernel.org Reviewed-by: Hyunchul Lee Signed-off-by: Namjae Jeon Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/ksmbd/vfs.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/ksmbd/vfs.c b/fs/ksmbd/vfs.c index 835b384b089593..02bafd52cd060c 100644 --- a/fs/ksmbd/vfs.c +++ b/fs/ksmbd/vfs.c @@ -1018,7 +1018,9 @@ int ksmbd_vfs_zero_data(struct ksmbd_work *work, struct ksmbd_file *fp, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, off, len); - return vfs_fallocate(fp->filp, FALLOC_FL_ZERO_RANGE, off, len); + return vfs_fallocate(fp->filp, + FALLOC_FL_ZERO_RANGE | FALLOC_FL_KEEP_SIZE, + off, len); } int ksmbd_vfs_fqar_lseek(struct ksmbd_file *fp, loff_t start, loff_t length, From 9d48194d3e490e62955867a9bad1c32259ac0035 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Sun, 19 Jun 2022 22:37:17 +0900 Subject: [PATCH 0168/1056] ksmbd: check invalid FileOffset and BeyondFinalZero in FSCTL_ZERO_DATA commit b5e5f9dfc915ff05b41dff56181e1dae101712bd upstream. FileOffset should not be greater than BeyondFinalZero in FSCTL_ZERO_DATA. And don't call ksmbd_vfs_zero_data() if length is zero. Cc: stable@vger.kernel.org Reviewed-by: Hyunchul Lee Signed-off-by: Namjae Jeon Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/ksmbd/smb2pdu.c | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/fs/ksmbd/smb2pdu.c b/fs/ksmbd/smb2pdu.c index 1ed3046dd5b3f1..5fc161b8c3859d 100644 --- a/fs/ksmbd/smb2pdu.c +++ b/fs/ksmbd/smb2pdu.c @@ -7684,7 +7684,7 @@ int smb2_ioctl(struct ksmbd_work *work) { struct file_zero_data_information *zero_data; struct ksmbd_file *fp; - loff_t off, len; + loff_t off, len, bfz; if (!test_tree_conn_flag(work->tcon, KSMBD_TREE_CONN_FLAG_WRITABLE)) { ksmbd_debug(SMB, @@ -7701,19 +7701,26 @@ int smb2_ioctl(struct ksmbd_work *work) zero_data = (struct file_zero_data_information *)&req->Buffer[0]; - fp = ksmbd_lookup_fd_fast(work, id); - if (!fp) { - ret = -ENOENT; + off = le64_to_cpu(zero_data->FileOffset); + bfz = le64_to_cpu(zero_data->BeyondFinalZero); + if (off > bfz) { + ret = -EINVAL; goto out; } - off = le64_to_cpu(zero_data->FileOffset); - len = le64_to_cpu(zero_data->BeyondFinalZero) - off; + len = bfz - off; + if (len) { + fp = ksmbd_lookup_fd_fast(work, id); + if (!fp) { + ret = -ENOENT; + goto out; + } - ret = ksmbd_vfs_zero_data(work, fp, off, len); - ksmbd_fd_put(work, fp); - if (ret < 0) - goto out; + ret = ksmbd_vfs_zero_data(work, fp, off, len); + ksmbd_fd_put(work, fp); + if (ret < 0) + goto out; + } break; } case FSCTL_QUERY_ALLOCATED_RANGES: From e33f5b5815746c09f7ae57aac920f7df304c32b0 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Sat, 25 Jun 2022 13:01:08 +0200 Subject: [PATCH 0169/1056] ksmbd: use vfs_llseek instead of dereferencing NULL commit 067baa9a37b32b95fdeabccde4b0cb6a2cf95f96 upstream. By not checking whether llseek is NULL, this might jump to NULL. Also, it doesn't check FMODE_LSEEK. Fix this by using vfs_llseek(), which always does the right thing. Fixes: f44158485826 ("cifsd: add file operations") Cc: stable@vger.kernel.org Cc: linux-cifs@vger.kernel.org Cc: Ronnie Sahlberg Cc: Hyunchul Lee Cc: Sergey Senozhatsky Reviewed-by: Namjae Jeon Acked-by: Al Viro Signed-off-by: Jason A. Donenfeld Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/ksmbd/vfs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/ksmbd/vfs.c b/fs/ksmbd/vfs.c index 02bafd52cd060c..ce852a12dea682 100644 --- a/fs/ksmbd/vfs.c +++ b/fs/ksmbd/vfs.c @@ -1051,7 +1051,7 @@ int ksmbd_vfs_fqar_lseek(struct ksmbd_file *fp, loff_t start, loff_t length, *out_count = 0; end = start + length; while (start < end && *out_count < in_count) { - extent_start = f->f_op->llseek(f, start, SEEK_DATA); + extent_start = vfs_llseek(f, start, SEEK_DATA); if (extent_start < 0) { if (extent_start != -ENXIO) ret = (int)extent_start; @@ -1061,7 +1061,7 @@ int ksmbd_vfs_fqar_lseek(struct ksmbd_file *fp, loff_t start, loff_t length, if (extent_start >= end) break; - extent_end = f->f_op->llseek(f, extent_start, SEEK_HOLE); + extent_end = vfs_llseek(f, extent_start, SEEK_HOLE); if (extent_end < 0) { if (extent_end != -ENXIO) ret = (int)extent_end; From 159f2454ab12af0088d93fe773ff8c98fd452e9c Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Thu, 23 Jun 2022 14:00:15 +0200 Subject: [PATCH 0170/1056] ipv6: take care of disable_policy when restoring routes commit 3b0dc529f56b5f2328244130683210be98f16f7f upstream. When routes corresponding to addresses are restored by fixup_permanent_addr(), the dst_nopolicy parameter was not set. The typical use case is a user that configures an address on a down interface and then put this interface up. Let's take care of this flag in addrconf_f6i_alloc(), so that every callers benefit ont it. CC: stable@kernel.org CC: David Forster Fixes: df789fe75206 ("ipv6: Provide ipv6 version of "disable_policy" sysctl") Reported-by: Siwar Zitouni Signed-off-by: Nicolas Dichtel Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20220623120015.32640-1-nicolas.dichtel@6wind.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/ipv6/addrconf.c | 4 ---- net/ipv6/route.c | 9 ++++++++- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 07b868c002a33d..89afd7bba33cc3 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1111,10 +1111,6 @@ ipv6_add_addr(struct inet6_dev *idev, struct ifa6_config *cfg, goto out; } - if (net->ipv6.devconf_all->disable_policy || - idev->cnf.disable_policy) - f6i->dst_nopolicy = true; - neigh_parms_data_state_setall(idev->nd_parms); ifa->addr = *cfg->pfx; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 0ca7c780d97a21..4ca754c360a354 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -4590,8 +4590,15 @@ struct fib6_info *addrconf_f6i_alloc(struct net *net, } f6i = ip6_route_info_create(&cfg, gfp_flags, NULL); - if (!IS_ERR(f6i)) + if (!IS_ERR(f6i)) { f6i->dst_nocount = true; + + if (!anycast && + (net->ipv6.devconf_all->disable_policy || + idev->cnf.disable_policy)) + f6i->dst_nopolicy = true; + } + return f6i; } From 25fab798784b0312d408b827475bd5a77ff94753 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Tue, 28 Jun 2022 12:15:08 +0200 Subject: [PATCH 0171/1056] net: phy: Don't trigger state machine while in suspend commit 1758bde2e4aa5ff188d53e7d9d388bbb7e12eebb upstream. Upon system sleep, mdio_bus_phy_suspend() stops the phy_state_machine(), but subsequent interrupts may retrigger it: They may have been left enabled to facilitate wakeup and are not quiesced until the ->suspend_noirq() phase. Unwanted interrupts may hence occur between mdio_bus_phy_suspend() and dpm_suspend_noirq(), as well as between dpm_resume_noirq() and mdio_bus_phy_resume(). Retriggering the phy_state_machine() through an interrupt is not only undesirable for the reason given in mdio_bus_phy_suspend() (freezing it midway with phydev->lock held), but also because the PHY may be inaccessible after it's suspended: Accesses to USB-attached PHYs are blocked once usb_suspend_both() clears the can_submit flag and PHYs on PCI network cards may become inaccessible upon suspend as well. Amend phy_interrupt() to avoid triggering the state machine if the PHY is suspended. Signal wakeup instead if the attached net_device or its parent has been configured as a wakeup source. (Those conditions are identical to mdio_bus_phy_may_suspend().) Postpone handling of the interrupt until the PHY has resumed. Before stopping the phy_state_machine() in mdio_bus_phy_suspend(), wait for a concurrent phy_interrupt() to run to completion. That is necessary because phy_interrupt() may have checked the PHY's suspend status before the system sleep transition commenced and it may thus retrigger the state machine after it was stopped. Likewise, after re-enabling interrupt handling in mdio_bus_phy_resume(), wait for a concurrent phy_interrupt() to complete to ensure that interrupts which it postponed are properly rerun. The issue was exposed by commit 1ce8b37241ed ("usbnet: smsc95xx: Forward PHY interrupts to PHY driver to avoid polling"), but has existed since forever. Fixes: 541cd3ee00a4 ("phylib: Fix deadlock on resume") Link: https://lore.kernel.org/netdev/a5315a8a-32c2-962f-f696-de9a26d30091@samsung.com/ Reported-by: Marek Szyprowski Tested-by: Marek Szyprowski Signed-off-by: Lukas Wunner Acked-by: Rafael J. Wysocki Cc: stable@vger.kernel.org # v2.6.33+ Reviewed-by: Andrew Lunn Link: https://lore.kernel.org/r/b7f386d04e9b5b0e2738f0125743e30676f309ef.1656410895.git.lukas@wunner.de Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/net/phy/phy.c | 23 +++++++++++++++++++++++ drivers/net/phy/phy_device.c | 23 +++++++++++++++++++++++ include/linux/phy.h | 6 ++++++ 3 files changed, 52 insertions(+) diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index f122026c468267..2fc851082e7b44 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include #include @@ -972,6 +973,28 @@ static irqreturn_t phy_interrupt(int irq, void *phy_dat) struct phy_driver *drv = phydev->drv; irqreturn_t ret; + /* Wakeup interrupts may occur during a system sleep transition. + * Postpone handling until the PHY has resumed. + */ + if (IS_ENABLED(CONFIG_PM_SLEEP) && phydev->irq_suspended) { + struct net_device *netdev = phydev->attached_dev; + + if (netdev) { + struct device *parent = netdev->dev.parent; + + if (netdev->wol_enabled) + pm_system_wakeup(); + else if (device_may_wakeup(&netdev->dev)) + pm_wakeup_dev_event(&netdev->dev, 0, true); + else if (parent && device_may_wakeup(parent)) + pm_wakeup_dev_event(parent, 0, true); + } + + phydev->irq_rerun = 1; + disable_irq_nosync(irq); + return IRQ_HANDLED; + } + mutex_lock(&phydev->lock); ret = drv->handle_interrupt(phydev); mutex_unlock(&phydev->lock); diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 28f4a383aba726..0d3f8fe8e42ce4 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -277,6 +277,15 @@ static __maybe_unused int mdio_bus_phy_suspend(struct device *dev) if (phydev->mac_managed_pm) return 0; + /* Wakeup interrupts may occur during the system sleep transition when + * the PHY is inaccessible. Set flag to postpone handling until the PHY + * has resumed. Wait for concurrent interrupt handler to complete. + */ + if (phy_interrupt_is_valid(phydev)) { + phydev->irq_suspended = 1; + synchronize_irq(phydev->irq); + } + /* We must stop the state machine manually, otherwise it stops out of * control, possibly with the phydev->lock held. Upon resume, netdev * may call phy routines that try to grab the same lock, and that may @@ -314,6 +323,20 @@ static __maybe_unused int mdio_bus_phy_resume(struct device *dev) if (ret < 0) return ret; no_resume: + if (phy_interrupt_is_valid(phydev)) { + phydev->irq_suspended = 0; + synchronize_irq(phydev->irq); + + /* Rerun interrupts which were postponed by phy_interrupt() + * because they occurred during the system sleep transition. + */ + if (phydev->irq_rerun) { + phydev->irq_rerun = 0; + enable_irq(phydev->irq); + irq_wake_thread(phydev->irq, phydev); + } + } + if (phydev->attached_dev && phydev->adjust_link) phy_start_machine(phydev); diff --git a/include/linux/phy.h b/include/linux/phy.h index 736e1d1a47c408..946ccec178588e 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -536,6 +536,10 @@ struct macsec_ops; * @mdix: Current crossover * @mdix_ctrl: User setting of crossover * @interrupts: Flag interrupts have been enabled + * @irq_suspended: Flag indicating PHY is suspended and therefore interrupt + * handling shall be postponed until PHY has resumed + * @irq_rerun: Flag indicating interrupts occurred while PHY was suspended, + * requiring a rerun of the interrupt handler after resume * @interface: enum phy_interface_t value * @skb: Netlink message for cable diagnostics * @nest: Netlink nest used for cable diagnostics @@ -590,6 +594,8 @@ struct phy_device { /* Interrupts are enabled */ unsigned interrupts:1; + unsigned irq_suspended:1; + unsigned irq_rerun:1; enum phy_state state; From 58caf60ce2175a821aa60b43ad78c218311d86b0 Mon Sep 17 00:00:00 2001 From: Pablo Greco Date: Sat, 25 Jun 2022 09:15:02 -0300 Subject: [PATCH 0172/1056] nvme-pci: add NVME_QUIRK_BOGUS_NID for ADATA XPG SX6000LNP (AKA SPECTRIX S40G) commit 1629de0e0373e04d68e88e6d9d3071fbf70b7ea8 upstream. ADATA XPG SPECTRIX S40G drives report bogus eui64 values that appear to be the same across drives in one system. Quirk them out so they are not marked as "non globally unique" duplicates. Before: [ 2.258919] nvme nvme1: pci function 0000:06:00.0 [ 2.264898] nvme nvme2: pci function 0000:05:00.0 [ 2.323235] nvme nvme1: failed to set APST feature (2) [ 2.326153] nvme nvme2: failed to set APST feature (2) [ 2.333935] nvme nvme1: allocated 64 MiB host memory buffer. [ 2.336492] nvme nvme2: allocated 64 MiB host memory buffer. [ 2.339611] nvme nvme1: 7/0/0 default/read/poll queues [ 2.341805] nvme nvme2: 7/0/0 default/read/poll queues [ 2.346114] nvme1n1: p1 [ 2.347197] nvme nvme2: globally duplicate IDs for nsid 1 After: [ 2.427715] nvme nvme1: pci function 0000:06:00.0 [ 2.427771] nvme nvme2: pci function 0000:05:00.0 [ 2.488154] nvme nvme2: failed to set APST feature (2) [ 2.489895] nvme nvme1: failed to set APST feature (2) [ 2.498773] nvme nvme2: allocated 64 MiB host memory buffer. [ 2.500587] nvme nvme1: allocated 64 MiB host memory buffer. [ 2.504113] nvme nvme2: 7/0/0 default/read/poll queues [ 2.507026] nvme nvme1: 7/0/0 default/read/poll queues [ 2.509467] nvme nvme2: Ignoring bogus Namespace Identifiers [ 2.512804] nvme nvme1: Ignoring bogus Namespace Identifiers [ 2.513698] nvme1n1: p1 Signed-off-by: Pablo Greco Reviewed-by: Keith Busch Reviewed-by: Chaitanya Kulkarni Cc: Signed-off-by: Christoph Hellwig Signed-off-by: Greg Kroah-Hartman --- drivers/nvme/host/pci.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 58b8461b2b0fb7..5e79dff17f809c 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -3342,7 +3342,8 @@ static const struct pci_device_id nvme_id_table[] = { .driver_data = NVME_QUIRK_NO_NS_DESC_LIST | NVME_QUIRK_IGNORE_DEV_SUBNQN, }, { PCI_DEVICE(0x10ec, 0x5762), /* ADATA SX6000LNP */ - .driver_data = NVME_QUIRK_IGNORE_DEV_SUBNQN, }, + .driver_data = NVME_QUIRK_IGNORE_DEV_SUBNQN | + NVME_QUIRK_BOGUS_NID, }, { PCI_DEVICE(0x1cc1, 0x8201), /* ADATA SX8200PNP 512GB */ .driver_data = NVME_QUIRK_NO_DEEPEST_PS | NVME_QUIRK_IGNORE_DEV_SUBNQN, }, From 526b53192d09b7c8efefe36ba1671a3a4108f543 Mon Sep 17 00:00:00 2001 From: Lamarque Vieira Souza Date: Wed, 29 Jun 2022 21:30:53 -0300 Subject: [PATCH 0173/1056] nvme-pci: add NVME_QUIRK_BOGUS_NID for ADATA IM2P33F8ABR1 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit e1c70d79346356bb1ede3f79436df80917845ab9 upstream. ADATA IM2P33F8ABR1 reports bogus eui64 values that appear to be the same across all drives. Quirk them out so they are not marked as "non globally unique" duplicates. Co-developed-by: Felipe de Jesus Araujo da Conceição Signed-off-by: Felipe de Jesus Araujo da Conceição Signed-off-by: Lamarque V. Souza Signed-off-by: Greg Kroah-Hartman Cc: stable@vger.kernel.org Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 5e79dff17f809c..c3db9f12dac3ab 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -3341,6 +3341,8 @@ static const struct pci_device_id nvme_id_table[] = { { PCI_DEVICE(0x1b4b, 0x1092), /* Lexar 256 GB SSD */ .driver_data = NVME_QUIRK_NO_NS_DESC_LIST | NVME_QUIRK_IGNORE_DEV_SUBNQN, }, + { PCI_DEVICE(0x1cc1, 0x33f8), /* ADATA IM2P33F8ABR1 1 TB */ + .driver_data = NVME_QUIRK_BOGUS_NID, }, { PCI_DEVICE(0x10ec, 0x5762), /* ADATA SX6000LNP */ .driver_data = NVME_QUIRK_IGNORE_DEV_SUBNQN | NVME_QUIRK_BOGUS_NID, }, From be74e588f1a5db165a31d2c39477b4c818a7351c Mon Sep 17 00:00:00 2001 From: Chris Ye Date: Tue, 31 May 2022 17:09:54 -0700 Subject: [PATCH 0174/1056] nvdimm: Fix badblocks clear off-by-one error commit ef9102004a87cb3f8b26e000a095a261fc0467d3 upstream. nvdimm_clear_badblocks_region() validates badblock clearing requests against the span of the region, however it compares the inclusive badblock request range to the exclusive region range. Fix up the off-by-one error. Fixes: 23f498448362 ("libnvdimm: rework region badblocks clearing") Cc: Signed-off-by: Chris Ye Reviewed-by: Vishal Verma Link: https://lore.kernel.org/r/165404219489.2445897.9792886413715690399.stgit@dwillia2-xfh Signed-off-by: Dan Williams Signed-off-by: Greg Kroah-Hartman --- drivers/nvdimm/bus.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c index 9dc7f3edd42b17..84d197cc09f8d1 100644 --- a/drivers/nvdimm/bus.c +++ b/drivers/nvdimm/bus.c @@ -185,8 +185,8 @@ static int nvdimm_clear_badblocks_region(struct device *dev, void *data) ndr_end = nd_region->ndr_start + nd_region->ndr_size - 1; /* make sure we are in the region */ - if (ctx->phys < nd_region->ndr_start - || (ctx->phys + ctx->cleared) > ndr_end) + if (ctx->phys < nd_region->ndr_start || + (ctx->phys + ctx->cleared - 1) > ndr_end) return 0; sector = (ctx->phys - nd_region->ndr_start) / 512; From 0c1d781d6b08b1b8942bb70421f494057fc6950c Mon Sep 17 00:00:00 2001 From: Liam Howlett Date: Fri, 24 Jun 2022 01:17:58 +0000 Subject: [PATCH 0175/1056] powerpc/prom_init: Fix kernel config grep commit 6886da5f49e6d86aad76807a93f3eef5e4f01b10 upstream. When searching for config options, use the KCONFIG_CONFIG shell variable so that builds using non-standard config locations work. Fixes: 26deb04342e3 ("powerpc: prepare string/mem functions for KASAN") Cc: stable@vger.kernel.org # v5.2+ Signed-off-by: Liam R. Howlett Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220624011745.4060795-1-Liam.Howlett@oracle.com Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/prom_init_check.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/prom_init_check.sh b/arch/powerpc/kernel/prom_init_check.sh index b183ab9c5107c9..dfa5f729f774d6 100644 --- a/arch/powerpc/kernel/prom_init_check.sh +++ b/arch/powerpc/kernel/prom_init_check.sh @@ -13,7 +13,7 @@ # If you really need to reference something from prom_init.o add # it to the list below: -grep "^CONFIG_KASAN=y$" .config >/dev/null +grep "^CONFIG_KASAN=y$" ${KCONFIG_CONFIG} >/dev/null if [ $? -eq 0 ] then MEM_FUNCS="__memcpy __memset" From e646baf1a4fdb29c3b791823f9908f435f837d97 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 23 Jun 2022 10:56:17 +0200 Subject: [PATCH 0176/1056] powerpc/book3e: Fix PUD allocation size in map_kernel_page() commit 986481618023e18e187646b0fff05a3c337531cb upstream. Commit 2fb4706057bc ("powerpc: add support for folded p4d page tables") erroneously changed PUD setup to a mix of PMD and PUD. Fix it. While at it, use PTE_TABLE_SIZE instead of PAGE_SIZE for PTE tables in order to avoid any confusion. Fixes: 2fb4706057bc ("powerpc: add support for folded p4d page tables") Cc: stable@vger.kernel.org # v5.8+ Signed-off-by: Christophe Leroy Acked-by: Mike Rapoport Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/95ddfd6176d53e6c85e13bd1c358359daa56775f.1655974558.git.christophe.leroy@csgroup.eu Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/mm/nohash/book3e_pgtable.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/mm/nohash/book3e_pgtable.c b/arch/powerpc/mm/nohash/book3e_pgtable.c index 77884e24281dd5..3d845e001c8748 100644 --- a/arch/powerpc/mm/nohash/book3e_pgtable.c +++ b/arch/powerpc/mm/nohash/book3e_pgtable.c @@ -95,8 +95,8 @@ int __ref map_kernel_page(unsigned long ea, unsigned long pa, pgprot_t prot) pgdp = pgd_offset_k(ea); p4dp = p4d_offset(pgdp, ea); if (p4d_none(*p4dp)) { - pmdp = early_alloc_pgtable(PMD_TABLE_SIZE); - p4d_populate(&init_mm, p4dp, pmdp); + pudp = early_alloc_pgtable(PUD_TABLE_SIZE); + p4d_populate(&init_mm, p4dp, pudp); } pudp = pud_offset(p4dp, ea); if (pud_none(*pudp)) { @@ -105,7 +105,7 @@ int __ref map_kernel_page(unsigned long ea, unsigned long pa, pgprot_t prot) } pmdp = pmd_offset(pudp, ea); if (!pmd_present(*pmdp)) { - ptep = early_alloc_pgtable(PAGE_SIZE); + ptep = early_alloc_pgtable(PTE_TABLE_SIZE); pmd_populate_kernel(&init_mm, pmdp, ptep); } ptep = pte_offset_kernel(pmdp, ea); From bdb4d98d6d956109b210dede54804ac81c25ec8f Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Tue, 28 Jun 2022 00:41:19 +0530 Subject: [PATCH 0177/1056] powerpc/bpf: Fix use of user_pt_regs in uapi MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit b21bd5a4b130f8370861478d2880985daace5913 upstream. Trying to build a .c file that includes : $ cat test_bpf_headers.c #include throws the below error: /usr/include/linux/bpf_perf_event.h:14:28: error: field ‘regs’ has incomplete type 14 | bpf_user_pt_regs_t regs; | ^~~~ This is because we typedef bpf_user_pt_regs_t to 'struct user_pt_regs' in arch/powerpc/include/uaps/asm/bpf_perf_event.h, but 'struct user_pt_regs' is not exposed to userspace. Powerpc has both pt_regs and user_pt_regs structures. However, unlike arm64 and s390, we expose user_pt_regs to userspace as just 'pt_regs'. As such, we should typedef bpf_user_pt_regs_t to 'struct pt_regs' for userspace. Within the kernel though, we want to typedef bpf_user_pt_regs_t to 'struct user_pt_regs'. Remove arch/powerpc/include/uapi/asm/bpf_perf_event.h so that the uapi/asm-generic version of the header is exposed to userspace. Introduce arch/powerpc/include/asm/bpf_perf_event.h so that we can typedef bpf_user_pt_regs_t to 'struct user_pt_regs' for use within the kernel. Note that this was not showing up with the bpf selftest build since tools/include/uapi/asm/bpf_perf_event.h didn't include the powerpc variant. Fixes: a6460b03f945ee ("powerpc/bpf: Fix broken uapi for BPF_PROG_TYPE_PERF_EVENT") Cc: stable@vger.kernel.org # v4.20+ Signed-off-by: Naveen N. Rao [mpe: Use typical naming for header include guard] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220627191119.142867-1-naveen.n.rao@linux.vnet.ibm.com Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/include/asm/bpf_perf_event.h | 9 +++++++++ arch/powerpc/include/uapi/asm/bpf_perf_event.h | 9 --------- 2 files changed, 9 insertions(+), 9 deletions(-) create mode 100644 arch/powerpc/include/asm/bpf_perf_event.h delete mode 100644 arch/powerpc/include/uapi/asm/bpf_perf_event.h diff --git a/arch/powerpc/include/asm/bpf_perf_event.h b/arch/powerpc/include/asm/bpf_perf_event.h new file mode 100644 index 00000000000000..e8a7b4ffb58c2e --- /dev/null +++ b/arch/powerpc/include/asm/bpf_perf_event.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_POWERPC_BPF_PERF_EVENT_H +#define _ASM_POWERPC_BPF_PERF_EVENT_H + +#include + +typedef struct user_pt_regs bpf_user_pt_regs_t; + +#endif /* _ASM_POWERPC_BPF_PERF_EVENT_H */ diff --git a/arch/powerpc/include/uapi/asm/bpf_perf_event.h b/arch/powerpc/include/uapi/asm/bpf_perf_event.h deleted file mode 100644 index 5e1e648aeec4c8..00000000000000 --- a/arch/powerpc/include/uapi/asm/bpf_perf_event.h +++ /dev/null @@ -1,9 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -#ifndef _UAPI__ASM_BPF_PERF_EVENT_H__ -#define _UAPI__ASM_BPF_PERF_EVENT_H__ - -#include - -typedef struct user_pt_regs bpf_user_pt_regs_t; - -#endif /* _UAPI__ASM_BPF_PERF_EVENT_H__ */ From 6352b2f4d8e95ec0ae576d7705435d64cfa29503 Mon Sep 17 00:00:00 2001 From: Heinz Mauelshagen Date: Tue, 28 Jun 2022 00:37:22 +0200 Subject: [PATCH 0178/1056] dm raid: fix accesses beyond end of raid member array commit 332bd0778775d0cf105c4b9e03e460b590749916 upstream. On dm-raid table load (using raid_ctr), dm-raid allocates an array rs->devs[rs->raid_disks] for the raid device members. rs->raid_disks is defined by the number of raid metadata and image tupples passed into the target's constructor. In the case of RAID layout changes being requested, that number can be different from the current number of members for existing raid sets as defined in their superblocks. Example RAID layout changes include: - raid1 legs being added/removed - raid4/5/6/10 number of stripes changed (stripe reshaping) - takeover to higher raid level (e.g. raid5 -> raid6) When accessing array members, rs->raid_disks must be used in control loops instead of the potentially larger value in rs->md.raid_disks. Otherwise it will cause memory access beyond the end of the rs->devs array. Fix this by changing code that is prone to out-of-bounds access. Also fix validate_raid_redundancy() to validate all devices that are added. Also, use braces to help clean up raid_iterate_devices(). The out-of-bounds memory accesses was discovered using KASAN. This commit was verified to pass all LVM2 RAID tests (with KASAN enabled). Cc: stable@vger.kernel.org Signed-off-by: Heinz Mauelshagen Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-raid.c | 34 ++++++++++++++++++---------------- 1 file changed, 18 insertions(+), 16 deletions(-) diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index d9ef52159a22bf..b0566aabc18673 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -1001,12 +1001,13 @@ static int validate_region_size(struct raid_set *rs, unsigned long region_size) static int validate_raid_redundancy(struct raid_set *rs) { unsigned int i, rebuild_cnt = 0; - unsigned int rebuilds_per_group = 0, copies; + unsigned int rebuilds_per_group = 0, copies, raid_disks; unsigned int group_size, last_group_start; - for (i = 0; i < rs->md.raid_disks; i++) - if (!test_bit(In_sync, &rs->dev[i].rdev.flags) || - !rs->dev[i].rdev.sb_page) + for (i = 0; i < rs->raid_disks; i++) + if (!test_bit(FirstUse, &rs->dev[i].rdev.flags) && + ((!test_bit(In_sync, &rs->dev[i].rdev.flags) || + !rs->dev[i].rdev.sb_page))) rebuild_cnt++; switch (rs->md.level) { @@ -1046,8 +1047,9 @@ static int validate_raid_redundancy(struct raid_set *rs) * A A B B C * C D D E E */ + raid_disks = min(rs->raid_disks, rs->md.raid_disks); if (__is_raid10_near(rs->md.new_layout)) { - for (i = 0; i < rs->md.raid_disks; i++) { + for (i = 0; i < raid_disks; i++) { if (!(i % copies)) rebuilds_per_group = 0; if ((!rs->dev[i].rdev.sb_page || @@ -1070,10 +1072,10 @@ static int validate_raid_redundancy(struct raid_set *rs) * results in the need to treat the last (potentially larger) * set differently. */ - group_size = (rs->md.raid_disks / copies); - last_group_start = (rs->md.raid_disks / group_size) - 1; + group_size = (raid_disks / copies); + last_group_start = (raid_disks / group_size) - 1; last_group_start *= group_size; - for (i = 0; i < rs->md.raid_disks; i++) { + for (i = 0; i < raid_disks; i++) { if (!(i % copies) && !(i > last_group_start)) rebuilds_per_group = 0; if ((!rs->dev[i].rdev.sb_page || @@ -1588,7 +1590,7 @@ static sector_t __rdev_sectors(struct raid_set *rs) { int i; - for (i = 0; i < rs->md.raid_disks; i++) { + for (i = 0; i < rs->raid_disks; i++) { struct md_rdev *rdev = &rs->dev[i].rdev; if (!test_bit(Journal, &rdev->flags) && @@ -3771,13 +3773,13 @@ static int raid_iterate_devices(struct dm_target *ti, unsigned int i; int r = 0; - for (i = 0; !r && i < rs->md.raid_disks; i++) - if (rs->dev[i].data_dev) - r = fn(ti, - rs->dev[i].data_dev, - 0, /* No offset on data devs */ - rs->md.dev_sectors, - data); + for (i = 0; !r && i < rs->raid_disks; i++) { + if (rs->dev[i].data_dev) { + r = fn(ti, rs->dev[i].data_dev, + 0, /* No offset on data devs */ + rs->md.dev_sectors, data); + } + } return r; } From d5b06039b195d4b6f94f5d345b1e4ac1975a9832 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Wed, 29 Jun 2022 13:40:57 -0400 Subject: [PATCH 0179/1056] dm raid: fix KASAN warning in raid5_add_disks commit 617b365872a247480e9dcd50a32c8d1806b21861 upstream. There's a KASAN warning in raid5_add_disk when running the LVM testsuite. The warning happens in the test lvconvert-raid-reshape-linear_to_raid6-single-type.sh. We fix the warning by verifying that rdev->saved_raid_disk is within limits. Cc: stable@vger.kernel.org Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/raid5.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index e54d802ee0bb80..b58984ddca13bc 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -8026,6 +8026,7 @@ static int raid5_add_disk(struct mddev *mddev, struct md_rdev *rdev) */ if (rdev->saved_raid_disk >= 0 && rdev->saved_raid_disk >= first && + rdev->saved_raid_disk <= last && conf->disks[rdev->saved_raid_disk].rdev == NULL) first = rdev->saved_raid_disk; From 0222575395bd508a9cd292c9ec4777b773fdb4f1 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Sat, 11 Jun 2022 00:20:23 +0200 Subject: [PATCH 0180/1056] s390/archrandom: simplify back to earlier design and initialize earlier commit e4f74400308cb8abde5fdc9cad609c2aba32110c upstream. s390x appears to present two RNG interfaces: - a "TRNG" that gathers entropy using some hardware function; and - a "DRBG" that takes in a seed and expands it. Previously, the TRNG was wired up to arch_get_random_{long,int}(), but it was observed that this was being called really frequently, resulting in high overhead. So it was changed to be wired up to arch_get_random_ seed_{long,int}(), which was a reasonable decision. Later on, the DRBG was then wired up to arch_get_random_{long,int}(), with a complicated buffer filling thread, to control overhead and rate. Fortunately, none of the performance issues matter much now. The RNG always attempts to use arch_get_random_seed_{long,int}() first, which means a complicated implementation of arch_get_random_{long,int}() isn't really valuable or useful to have around. And it's only used when reseeding, which means it won't hit the high throughput complications that were faced before. So this commit returns to an earlier design of just calling the TRNG in arch_get_random_seed_{long,int}(), and returning false in arch_get_ random_{long,int}(). Part of what makes the simplification possible is that the RNG now seeds itself using the TRNG at bootup. But this only works if the TRNG is detected early in boot, before random_init() is called. So this commit also causes that check to happen in setup_arch(). Cc: stable@vger.kernel.org Cc: Harald Freudenberger Cc: Ingo Franzki Cc: Juergen Christ Cc: Heiko Carstens Signed-off-by: Jason A. Donenfeld Link: https://lore.kernel.org/r/20220610222023.378448-1-Jason@zx2c4.com Reviewed-by: Harald Freudenberger Acked-by: Heiko Carstens Signed-off-by: Alexander Gordeev Signed-off-by: Jason A. Donenfeld Signed-off-by: Greg Kroah-Hartman --- arch/s390/crypto/arch_random.c | 217 ----------------------------- arch/s390/include/asm/archrandom.h | 14 +- arch/s390/kernel/setup.c | 5 + 3 files changed, 12 insertions(+), 224 deletions(-) diff --git a/arch/s390/crypto/arch_random.c b/arch/s390/crypto/arch_random.c index 56007c763902ae..1f2d40993c4d2d 100644 --- a/arch/s390/crypto/arch_random.c +++ b/arch/s390/crypto/arch_random.c @@ -4,232 +4,15 @@ * * Copyright IBM Corp. 2017, 2020 * Author(s): Harald Freudenberger - * - * The s390_arch_random_generate() function may be called from random.c - * in interrupt context. So this implementation does the best to be very - * fast. There is a buffer of random data which is asynchronously checked - * and filled by a workqueue thread. - * If there are enough bytes in the buffer the s390_arch_random_generate() - * just delivers these bytes. Otherwise false is returned until the - * worker thread refills the buffer. - * The worker fills the rng buffer by pulling fresh entropy from the - * high quality (but slow) true hardware random generator. This entropy - * is then spread over the buffer with an pseudo random generator PRNG. - * As the arch_get_random_seed_long() fetches 8 bytes and the calling - * function add_interrupt_randomness() counts this as 1 bit entropy the - * distribution needs to make sure there is in fact 1 bit entropy contained - * in 8 bytes of the buffer. The current values pull 32 byte entropy - * and scatter this into a 2048 byte buffer. So 8 byte in the buffer - * will contain 1 bit of entropy. - * The worker thread is rescheduled based on the charge level of the - * buffer but at least with 500 ms delay to avoid too much CPU consumption. - * So the max. amount of rng data delivered via arch_get_random_seed is - * limited to 4k bytes per second. */ #include #include #include -#include #include -#include -#include #include DEFINE_STATIC_KEY_FALSE(s390_arch_random_available); atomic64_t s390_arch_random_counter = ATOMIC64_INIT(0); EXPORT_SYMBOL(s390_arch_random_counter); - -#define ARCH_REFILL_TICKS (HZ/2) -#define ARCH_PRNG_SEED_SIZE 32 -#define ARCH_RNG_BUF_SIZE 2048 - -static DEFINE_SPINLOCK(arch_rng_lock); -static u8 *arch_rng_buf; -static unsigned int arch_rng_buf_idx; - -static void arch_rng_refill_buffer(struct work_struct *); -static DECLARE_DELAYED_WORK(arch_rng_work, arch_rng_refill_buffer); - -bool s390_arch_random_generate(u8 *buf, unsigned int nbytes) -{ - /* max hunk is ARCH_RNG_BUF_SIZE */ - if (nbytes > ARCH_RNG_BUF_SIZE) - return false; - - /* lock rng buffer */ - if (!spin_trylock(&arch_rng_lock)) - return false; - - /* try to resolve the requested amount of bytes from the buffer */ - arch_rng_buf_idx -= nbytes; - if (arch_rng_buf_idx < ARCH_RNG_BUF_SIZE) { - memcpy(buf, arch_rng_buf + arch_rng_buf_idx, nbytes); - atomic64_add(nbytes, &s390_arch_random_counter); - spin_unlock(&arch_rng_lock); - return true; - } - - /* not enough bytes in rng buffer, refill is done asynchronously */ - spin_unlock(&arch_rng_lock); - - return false; -} -EXPORT_SYMBOL(s390_arch_random_generate); - -static void arch_rng_refill_buffer(struct work_struct *unused) -{ - unsigned int delay = ARCH_REFILL_TICKS; - - spin_lock(&arch_rng_lock); - if (arch_rng_buf_idx > ARCH_RNG_BUF_SIZE) { - /* buffer is exhausted and needs refill */ - u8 seed[ARCH_PRNG_SEED_SIZE]; - u8 prng_wa[240]; - /* fetch ARCH_PRNG_SEED_SIZE bytes of entropy */ - cpacf_trng(NULL, 0, seed, sizeof(seed)); - /* blow this entropy up to ARCH_RNG_BUF_SIZE with PRNG */ - memset(prng_wa, 0, sizeof(prng_wa)); - cpacf_prno(CPACF_PRNO_SHA512_DRNG_SEED, - &prng_wa, NULL, 0, seed, sizeof(seed)); - cpacf_prno(CPACF_PRNO_SHA512_DRNG_GEN, - &prng_wa, arch_rng_buf, ARCH_RNG_BUF_SIZE, NULL, 0); - arch_rng_buf_idx = ARCH_RNG_BUF_SIZE; - } - delay += (ARCH_REFILL_TICKS * arch_rng_buf_idx) / ARCH_RNG_BUF_SIZE; - spin_unlock(&arch_rng_lock); - - /* kick next check */ - queue_delayed_work(system_long_wq, &arch_rng_work, delay); -} - -/* - * Here follows the implementation of s390_arch_get_random_long(). - * - * The random longs to be pulled by arch_get_random_long() are - * prepared in an 4K buffer which is filled from the NIST 800-90 - * compliant s390 drbg. By default the random long buffer is refilled - * 256 times before the drbg itself needs a reseed. The reseed of the - * drbg is done with 32 bytes fetched from the high quality (but slow) - * trng which is assumed to deliver 100% entropy. So the 32 * 8 = 256 - * bits of entropy are spread over 256 * 4KB = 1MB serving 131072 - * arch_get_random_long() invocations before reseeded. - * - * How often the 4K random long buffer is refilled with the drbg - * before the drbg is reseeded can be adjusted. There is a module - * parameter 's390_arch_rnd_long_drbg_reseed' accessible via - * /sys/module/arch_random/parameters/rndlong_drbg_reseed - * or as kernel command line parameter - * arch_random.rndlong_drbg_reseed= - * This parameter tells how often the drbg fills the 4K buffer before - * it is re-seeded by fresh entropy from the trng. - * A value of 16 results in reseeding the drbg at every 16 * 4 KB = 64 - * KB with 32 bytes of fresh entropy pulled from the trng. So a value - * of 16 would result in 256 bits entropy per 64 KB. - * A value of 256 results in 1MB of drbg output before a reseed of the - * drbg is done. So this would spread the 256 bits of entropy among 1MB. - * Setting this parameter to 0 forces the reseed to take place every - * time the 4K buffer is depleted, so the entropy rises to 256 bits - * entropy per 4K or 0.5 bit entropy per arch_get_random_long(). With - * setting this parameter to negative values all this effort is - * disabled, arch_get_random long() returns false and thus indicating - * that the arch_get_random_long() feature is disabled at all. - */ - -static unsigned long rndlong_buf[512]; -static DEFINE_SPINLOCK(rndlong_lock); -static int rndlong_buf_index; - -static int rndlong_drbg_reseed = 256; -module_param_named(rndlong_drbg_reseed, rndlong_drbg_reseed, int, 0600); -MODULE_PARM_DESC(rndlong_drbg_reseed, "s390 arch_get_random_long() drbg reseed"); - -static inline void refill_rndlong_buf(void) -{ - static u8 prng_ws[240]; - static int drbg_counter; - - if (--drbg_counter < 0) { - /* need to re-seed the drbg */ - u8 seed[32]; - - /* fetch seed from trng */ - cpacf_trng(NULL, 0, seed, sizeof(seed)); - /* seed drbg */ - memset(prng_ws, 0, sizeof(prng_ws)); - cpacf_prno(CPACF_PRNO_SHA512_DRNG_SEED, - &prng_ws, NULL, 0, seed, sizeof(seed)); - /* re-init counter for drbg */ - drbg_counter = rndlong_drbg_reseed; - } - - /* fill the arch_get_random_long buffer from drbg */ - cpacf_prno(CPACF_PRNO_SHA512_DRNG_GEN, &prng_ws, - (u8 *) rndlong_buf, sizeof(rndlong_buf), - NULL, 0); -} - -bool s390_arch_get_random_long(unsigned long *v) -{ - bool rc = false; - unsigned long flags; - - /* arch_get_random_long() disabled ? */ - if (rndlong_drbg_reseed < 0) - return false; - - /* try to lock the random long lock */ - if (!spin_trylock_irqsave(&rndlong_lock, flags)) - return false; - - if (--rndlong_buf_index >= 0) { - /* deliver next long value from the buffer */ - *v = rndlong_buf[rndlong_buf_index]; - rc = true; - goto out; - } - - /* buffer is depleted and needs refill */ - if (in_interrupt()) { - /* delay refill in interrupt context to next caller */ - rndlong_buf_index = 0; - goto out; - } - - /* refill random long buffer */ - refill_rndlong_buf(); - rndlong_buf_index = ARRAY_SIZE(rndlong_buf); - - /* and provide one random long */ - *v = rndlong_buf[--rndlong_buf_index]; - rc = true; - -out: - spin_unlock_irqrestore(&rndlong_lock, flags); - return rc; -} -EXPORT_SYMBOL(s390_arch_get_random_long); - -static int __init s390_arch_random_init(void) -{ - /* all the needed PRNO subfunctions available ? */ - if (cpacf_query_func(CPACF_PRNO, CPACF_PRNO_TRNG) && - cpacf_query_func(CPACF_PRNO, CPACF_PRNO_SHA512_DRNG_GEN)) { - - /* alloc arch random working buffer */ - arch_rng_buf = kmalloc(ARCH_RNG_BUF_SIZE, GFP_KERNEL); - if (!arch_rng_buf) - return -ENOMEM; - - /* kick worker queue job to fill the random buffer */ - queue_delayed_work(system_long_wq, - &arch_rng_work, ARCH_REFILL_TICKS); - - /* enable arch random to the outside world */ - static_branch_enable(&s390_arch_random_available); - } - - return 0; -} -arch_initcall(s390_arch_random_init); diff --git a/arch/s390/include/asm/archrandom.h b/arch/s390/include/asm/archrandom.h index 5dc712fde3c7f0..2c6e1c6ecbe780 100644 --- a/arch/s390/include/asm/archrandom.h +++ b/arch/s390/include/asm/archrandom.h @@ -15,17 +15,13 @@ #include #include +#include DECLARE_STATIC_KEY_FALSE(s390_arch_random_available); extern atomic64_t s390_arch_random_counter; -bool s390_arch_get_random_long(unsigned long *v); -bool s390_arch_random_generate(u8 *buf, unsigned int nbytes); - static inline bool __must_check arch_get_random_long(unsigned long *v) { - if (static_branch_likely(&s390_arch_random_available)) - return s390_arch_get_random_long(v); return false; } @@ -37,7 +33,9 @@ static inline bool __must_check arch_get_random_int(unsigned int *v) static inline bool __must_check arch_get_random_seed_long(unsigned long *v) { if (static_branch_likely(&s390_arch_random_available)) { - return s390_arch_random_generate((u8 *)v, sizeof(*v)); + cpacf_trng(NULL, 0, (u8 *)v, sizeof(*v)); + atomic64_add(sizeof(*v), &s390_arch_random_counter); + return true; } return false; } @@ -45,7 +43,9 @@ static inline bool __must_check arch_get_random_seed_long(unsigned long *v) static inline bool __must_check arch_get_random_seed_int(unsigned int *v) { if (static_branch_likely(&s390_arch_random_available)) { - return s390_arch_random_generate((u8 *)v, sizeof(*v)); + cpacf_trng(NULL, 0, (u8 *)v, sizeof(*v)); + atomic64_add(sizeof(*v), &s390_arch_random_counter); + return true; } return false; } diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index ee67215a678a58..8ede12c4ba6b50 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -880,6 +880,11 @@ static void __init setup_randomness(void) if (stsi(vmms, 3, 2, 2) == 0 && vmms->count) add_device_randomness(&vmms->vm, sizeof(vmms->vm[0]) * vmms->count); memblock_free((unsigned long) vmms, PAGE_SIZE); + +#ifdef CONFIG_ARCH_RANDOM + if (cpacf_query_func(CPACF_PRNO, CPACF_PRNO_TRNG)) + static_branch_enable(&s390_arch_random_available); +#endif } /* From 6437d3deee28b9e19c06dcf2b91b62db387eebe0 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 30 Jun 2022 16:48:18 -0400 Subject: [PATCH 0181/1056] SUNRPC: Fix READ_PLUS crasher commit a23dd544debcda4ee4a549ec7de59e85c3c8345c upstream. Looks like there are still cases when "space_left - frag1bytes" can legitimately exceed PAGE_SIZE. Ensure that xdr->end always remains within the current encode buffer. Reported-by: Bruce Fields Reported-by: Zorro Lang Link: https://bugzilla.kernel.org/show_bug.cgi?id=216151 Fixes: 6c254bf3b637 ("SUNRPC: Fix the calculation of xdr->end in xdr_get_next_encode_buffer()") Signed-off-by: Chuck Lever Signed-off-by: Greg Kroah-Hartman --- net/sunrpc/xdr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c index 85473264cccfb8..f0a0a4ad6d525f 100644 --- a/net/sunrpc/xdr.c +++ b/net/sunrpc/xdr.c @@ -979,7 +979,7 @@ static __be32 *xdr_get_next_encode_buffer(struct xdr_stream *xdr, */ xdr->p = (void *)p + frag2bytes; space_left = xdr->buf->buflen - xdr->buf->len; - if (space_left - nbytes >= PAGE_SIZE) + if (space_left - frag1bytes >= PAGE_SIZE) xdr->end = (void *)p + PAGE_SIZE; else xdr->end = (void *)p + space_left - frag1bytes; From 659d39545260100628d8a30020d09fb6bf63b915 Mon Sep 17 00:00:00 2001 From: Duoming Zhou Date: Wed, 29 Jun 2022 08:26:40 +0800 Subject: [PATCH 0182/1056] net: rose: fix UAF bugs caused by timer handler commit 9cc02ede696272c5271a401e4f27c262359bc2f6 upstream. There are UAF bugs in rose_heartbeat_expiry(), rose_timer_expiry() and rose_idletimer_expiry(). The root cause is that del_timer() could not stop the timer handler that is running and the refcount of sock is not managed properly. One of the UAF bugs is shown below: (thread 1) | (thread 2) | rose_bind | rose_connect | rose_start_heartbeat rose_release | (wait a time) case ROSE_STATE_0 | rose_destroy_socket | rose_heartbeat_expiry rose_stop_heartbeat | sock_put(sk) | ... sock_put(sk) // FREE | | bh_lock_sock(sk) // USE The sock is deallocated by sock_put() in rose_release() and then used by bh_lock_sock() in rose_heartbeat_expiry(). Although rose_destroy_socket() calls rose_stop_heartbeat(), it could not stop the timer that is running. The KASAN report triggered by POC is shown below: BUG: KASAN: use-after-free in _raw_spin_lock+0x5a/0x110 Write of size 4 at addr ffff88800ae59098 by task swapper/3/0 ... Call Trace: dump_stack_lvl+0xbf/0xee print_address_description+0x7b/0x440 print_report+0x101/0x230 ? irq_work_single+0xbb/0x140 ? _raw_spin_lock+0x5a/0x110 kasan_report+0xed/0x120 ? _raw_spin_lock+0x5a/0x110 kasan_check_range+0x2bd/0x2e0 _raw_spin_lock+0x5a/0x110 rose_heartbeat_expiry+0x39/0x370 ? rose_start_heartbeat+0xb0/0xb0 call_timer_fn+0x2d/0x1c0 ? rose_start_heartbeat+0xb0/0xb0 expire_timers+0x1f3/0x320 __run_timers+0x3ff/0x4d0 run_timer_softirq+0x41/0x80 __do_softirq+0x233/0x544 irq_exit_rcu+0x41/0xa0 sysvec_apic_timer_interrupt+0x8c/0xb0 asm_sysvec_apic_timer_interrupt+0x1b/0x20 RIP: 0010:default_idle+0xb/0x10 RSP: 0018:ffffc9000012fea0 EFLAGS: 00000202 RAX: 000000000000bcae RBX: ffff888006660f00 RCX: 000000000000bcae RDX: 0000000000000001 RSI: ffffffff843a11c0 RDI: ffffffff843a1180 RBP: dffffc0000000000 R08: dffffc0000000000 R09: ffffed100da36d46 R10: dfffe9100da36d47 R11: ffffffff83cf0950 R12: 0000000000000000 R13: 1ffff11000ccc1e0 R14: ffffffff8542af28 R15: dffffc0000000000 ... Allocated by task 146: __kasan_kmalloc+0xc4/0xf0 sk_prot_alloc+0xdd/0x1a0 sk_alloc+0x2d/0x4e0 rose_create+0x7b/0x330 __sock_create+0x2dd/0x640 __sys_socket+0xc7/0x270 __x64_sys_socket+0x71/0x80 do_syscall_64+0x43/0x90 entry_SYSCALL_64_after_hwframe+0x46/0xb0 Freed by task 152: kasan_set_track+0x4c/0x70 kasan_set_free_info+0x1f/0x40 ____kasan_slab_free+0x124/0x190 kfree+0xd3/0x270 __sk_destruct+0x314/0x460 rose_release+0x2fa/0x3b0 sock_close+0xcb/0x230 __fput+0x2d9/0x650 task_work_run+0xd6/0x160 exit_to_user_mode_loop+0xc7/0xd0 exit_to_user_mode_prepare+0x4e/0x80 syscall_exit_to_user_mode+0x20/0x40 do_syscall_64+0x4f/0x90 entry_SYSCALL_64_after_hwframe+0x46/0xb0 This patch adds refcount of sock when we use functions such as rose_start_heartbeat() and so on to start timer, and decreases the refcount of sock when timer is finished or deleted by functions such as rose_stop_heartbeat() and so on. As a result, the UAF bugs could be mitigated. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Duoming Zhou Tested-by: Duoming Zhou Link: https://lore.kernel.org/r/20220629002640.5693-1-duoming@zju.edu.cn Signed-off-by: Paolo Abeni Signed-off-by: Greg Kroah-Hartman --- net/rose/rose_timer.c | 34 +++++++++++++++++++--------------- 1 file changed, 19 insertions(+), 15 deletions(-) diff --git a/net/rose/rose_timer.c b/net/rose/rose_timer.c index b3138fc2e552ea..f06ddbed3fed63 100644 --- a/net/rose/rose_timer.c +++ b/net/rose/rose_timer.c @@ -31,89 +31,89 @@ static void rose_idletimer_expiry(struct timer_list *); void rose_start_heartbeat(struct sock *sk) { - del_timer(&sk->sk_timer); + sk_stop_timer(sk, &sk->sk_timer); sk->sk_timer.function = rose_heartbeat_expiry; sk->sk_timer.expires = jiffies + 5 * HZ; - add_timer(&sk->sk_timer); + sk_reset_timer(sk, &sk->sk_timer, sk->sk_timer.expires); } void rose_start_t1timer(struct sock *sk) { struct rose_sock *rose = rose_sk(sk); - del_timer(&rose->timer); + sk_stop_timer(sk, &rose->timer); rose->timer.function = rose_timer_expiry; rose->timer.expires = jiffies + rose->t1; - add_timer(&rose->timer); + sk_reset_timer(sk, &rose->timer, rose->timer.expires); } void rose_start_t2timer(struct sock *sk) { struct rose_sock *rose = rose_sk(sk); - del_timer(&rose->timer); + sk_stop_timer(sk, &rose->timer); rose->timer.function = rose_timer_expiry; rose->timer.expires = jiffies + rose->t2; - add_timer(&rose->timer); + sk_reset_timer(sk, &rose->timer, rose->timer.expires); } void rose_start_t3timer(struct sock *sk) { struct rose_sock *rose = rose_sk(sk); - del_timer(&rose->timer); + sk_stop_timer(sk, &rose->timer); rose->timer.function = rose_timer_expiry; rose->timer.expires = jiffies + rose->t3; - add_timer(&rose->timer); + sk_reset_timer(sk, &rose->timer, rose->timer.expires); } void rose_start_hbtimer(struct sock *sk) { struct rose_sock *rose = rose_sk(sk); - del_timer(&rose->timer); + sk_stop_timer(sk, &rose->timer); rose->timer.function = rose_timer_expiry; rose->timer.expires = jiffies + rose->hb; - add_timer(&rose->timer); + sk_reset_timer(sk, &rose->timer, rose->timer.expires); } void rose_start_idletimer(struct sock *sk) { struct rose_sock *rose = rose_sk(sk); - del_timer(&rose->idletimer); + sk_stop_timer(sk, &rose->idletimer); if (rose->idle > 0) { rose->idletimer.function = rose_idletimer_expiry; rose->idletimer.expires = jiffies + rose->idle; - add_timer(&rose->idletimer); + sk_reset_timer(sk, &rose->idletimer, rose->idletimer.expires); } } void rose_stop_heartbeat(struct sock *sk) { - del_timer(&sk->sk_timer); + sk_stop_timer(sk, &sk->sk_timer); } void rose_stop_timer(struct sock *sk) { - del_timer(&rose_sk(sk)->timer); + sk_stop_timer(sk, &rose_sk(sk)->timer); } void rose_stop_idletimer(struct sock *sk) { - del_timer(&rose_sk(sk)->idletimer); + sk_stop_timer(sk, &rose_sk(sk)->idletimer); } static void rose_heartbeat_expiry(struct timer_list *t) @@ -130,6 +130,7 @@ static void rose_heartbeat_expiry(struct timer_list *t) (sk->sk_state == TCP_LISTEN && sock_flag(sk, SOCK_DEAD))) { bh_unlock_sock(sk); rose_destroy_socket(sk); + sock_put(sk); return; } break; @@ -152,6 +153,7 @@ static void rose_heartbeat_expiry(struct timer_list *t) rose_start_heartbeat(sk); bh_unlock_sock(sk); + sock_put(sk); } static void rose_timer_expiry(struct timer_list *t) @@ -181,6 +183,7 @@ static void rose_timer_expiry(struct timer_list *t) break; } bh_unlock_sock(sk); + sock_put(sk); } static void rose_idletimer_expiry(struct timer_list *t) @@ -205,4 +208,5 @@ static void rose_idletimer_expiry(struct timer_list *t) sock_set_flag(sk, SOCK_DEAD); } bh_unlock_sock(sk); + sock_put(sk); } From aa0806efb8f046b2354e30a641262d345527b6e3 Mon Sep 17 00:00:00 2001 From: Jose Alonso Date: Tue, 28 Jun 2022 12:13:02 -0300 Subject: [PATCH 0183/1056] net: usb: ax88179_178a: Fix packet receiving commit f8ebb3ac881b17712e1d5967c97ab1806b16d3d6 upstream. This patch corrects packet receiving in ax88179_rx_fixup. - problem observed: ifconfig shows allways a lot of 'RX Errors' while packets are received normally. This occurs because ax88179_rx_fixup does not recognise properly the usb urb received. The packets are normally processed and at the end, the code exits with 'return 0', generating RX Errors. (pkt_cnt==-2 and ptk_hdr over field rx_hdr trying to identify another packet there) This is a usb urb received by "tcpdump -i usbmon2 -X" on a little-endian CPU: 0x0000: eeee f8e3 3b19 87a0 94de 80e3 daac 0800 ^ packet 1 start (pkt_len = 0x05ec) ^^^^ IP alignment pseudo header ^ ethernet packet start last byte ethernet packet v padding (8-bytes aligned) vvvv vvvv 0x05e0: c92d d444 1420 8a69 83dd 272f e82b 9811 0x05f0: eeee f8e3 3b19 87a0 94de 80e3 daac 0800 ... ^ packet 2 0x0be0: eeee f8e3 3b19 87a0 94de 80e3 daac 0800 ... 0x1130: 9d41 9171 8a38 0ec5 eeee f8e3 3b19 87a0 ... 0x1720: 8cfc 15ff 5e4c e85c eeee f8e3 3b19 87a0 ... 0x1d10: ecfa 2a3a 19ab c78c eeee f8e3 3b19 87a0 ... 0x2070: eeee f8e3 3b19 87a0 94de 80e3 daac 0800 ... ^ packet 7 0x2120: 7c88 4ca5 5c57 7dcc 0d34 7577 f778 7e0a 0x2130: f032 e093 7489 0740 3008 ec05 0000 0080 ====1==== ====2==== hdr_off ^ pkt_len = 0x05ec ^^^^ AX_RXHDR_*=0x00830 ^^^^ ^ pkt_len = 0 ^^^^ AX_RXHDR_DROP_ERR=0x80000000 ^^^^ ^ 0x2140: 3008 ec05 0000 0080 3008 5805 0000 0080 0x2150: 3008 ec05 0000 0080 3008 ec05 0000 0080 0x2160: 3008 5803 0000 0080 3008 c800 0000 0080 ===11==== ===12==== ===13==== ===14==== 0x2170: 0000 0000 0e00 3821 ^^^^ ^^^^ rx_hdr ^^^^ pkt_cnt=14 ^^^^ hdr_off=0x2138 ^^^^ ^^^^ padding The dump shows that pkt_cnt is the number of entrys in the per-packet metadata. It is "2 * packet count". Each packet have two entrys. The first have a valid value (pkt_len and AX_RXHDR_*) and the second have a dummy-header 0x80000000 (pkt_len=0 with AX_RXHDR_DROP_ERR). Why exists dummy-header for each packet?!? My guess is that this was done probably to align the entry for each packet to 64-bits and maintain compatibility with old firmware. There is also a padding (0x00000000) before the rx_hdr to align the end of rx_hdr to 64-bit. Note that packets have a alignment of 64-bits (8-bytes). This patch assumes that the dummy-header and the last padding are optional. So it preserves semantics and recognises the same valid packets as the current code. This patch was made using only the dumpfile information and tested with only one device: 0b95:1790 ASIX Electronics Corp. AX88179 Gigabit Ethernet Fixes: 57bc3d3ae8c1 ("net: usb: ax88179_178a: Fix out-of-bounds accesses in RX fixup") Fixes: e2ca90c276e1 ("ax88179_178a: ASIX AX88179_178A USB 3.0/2.0 to gigabit ethernet adapter driver") Signed-off-by: Jose Alonso Acked-by: Paolo Abeni Link: https://lore.kernel.org/r/d6970bb04bf67598af4d316eaeb1792040b18cfd.camel@gmail.com Signed-off-by: Paolo Abeni Signed-off-by: Greg Kroah-Hartman --- drivers/net/usb/ax88179_178a.c | 101 +++++++++++++++++++++++++-------- 1 file changed, 76 insertions(+), 25 deletions(-) diff --git a/drivers/net/usb/ax88179_178a.c b/drivers/net/usb/ax88179_178a.c index d5ce642200e8ed..0a2c3860179e7a 100644 --- a/drivers/net/usb/ax88179_178a.c +++ b/drivers/net/usb/ax88179_178a.c @@ -1471,6 +1471,42 @@ static int ax88179_rx_fixup(struct usbnet *dev, struct sk_buff *skb) * are bundled into this buffer and where we can find an array of * per-packet metadata (which contains elements encoded into u16). */ + + /* SKB contents for current firmware: + * + * ... + * + * + * ... + * + * + * + * where: + * contains pkt_len bytes: + * 2 bytes of IP alignment pseudo header + * packet received + * contains 4 bytes: + * pkt_len and fields AX_RXHDR_* + * 0-7 bytes to terminate at + * 8 bytes boundary (64-bit). + * 4 bytes to make rx_hdr terminate at + * 8 bytes boundary (64-bit) + * contains 4 bytes: + * pkt_len=0 and AX_RXHDR_DROP_ERR + * contains 4 bytes: + * pkt_cnt and hdr_off (offset of + * ) + * + * pkt_cnt is number of entrys in the per-packet metadata. + * In current firmware there is 2 entrys per packet. + * The first points to the packet and the + * second is a dummy header. + * This was done probably to align fields in 64-bit and + * maintain compatibility with old firmware. + * This code assumes that and are + * optional. + */ + if (skb->len < 4) return 0; skb_trim(skb, skb->len - 4); @@ -1484,51 +1520,66 @@ static int ax88179_rx_fixup(struct usbnet *dev, struct sk_buff *skb) /* Make sure that the bounds of the metadata array are inside the SKB * (and in front of the counter at the end). */ - if (pkt_cnt * 2 + hdr_off > skb->len) + if (pkt_cnt * 4 + hdr_off > skb->len) return 0; pkt_hdr = (u32 *)(skb->data + hdr_off); /* Packets must not overlap the metadata array */ skb_trim(skb, hdr_off); - for (; ; pkt_cnt--, pkt_hdr++) { + for (; pkt_cnt > 0; pkt_cnt--, pkt_hdr++) { + u16 pkt_len_plus_padd; u16 pkt_len; le32_to_cpus(pkt_hdr); pkt_len = (*pkt_hdr >> 16) & 0x1fff; + pkt_len_plus_padd = (pkt_len + 7) & 0xfff8; - if (pkt_len > skb->len) + /* Skip dummy header used for alignment + */ + if (pkt_len == 0) + continue; + + if (pkt_len_plus_padd > skb->len) return 0; /* Check CRC or runt packet */ - if (((*pkt_hdr & (AX_RXHDR_CRC_ERR | AX_RXHDR_DROP_ERR)) == 0) && - pkt_len >= 2 + ETH_HLEN) { - bool last = (pkt_cnt == 0); - - if (last) { - ax_skb = skb; - } else { - ax_skb = skb_clone(skb, GFP_ATOMIC); - if (!ax_skb) - return 0; - } - ax_skb->len = pkt_len; - /* Skip IP alignment pseudo header */ - skb_pull(ax_skb, 2); - skb_set_tail_pointer(ax_skb, ax_skb->len); - ax_skb->truesize = pkt_len + sizeof(struct sk_buff); - ax88179_rx_checksum(ax_skb, pkt_hdr); + if ((*pkt_hdr & (AX_RXHDR_CRC_ERR | AX_RXHDR_DROP_ERR)) || + pkt_len < 2 + ETH_HLEN) { + dev->net->stats.rx_errors++; + skb_pull(skb, pkt_len_plus_padd); + continue; + } - if (last) - return 1; + /* last packet */ + if (pkt_len_plus_padd == skb->len) { + skb_trim(skb, pkt_len); - usbnet_skb_return(dev, ax_skb); + /* Skip IP alignment pseudo header */ + skb_pull(skb, 2); + + skb->truesize = SKB_TRUESIZE(pkt_len_plus_padd); + ax88179_rx_checksum(skb, pkt_hdr); + return 1; } - /* Trim this packet away from the SKB */ - if (!skb_pull(skb, (pkt_len + 7) & 0xFFF8)) + ax_skb = skb_clone(skb, GFP_ATOMIC); + if (!ax_skb) return 0; + skb_trim(ax_skb, pkt_len); + + /* Skip IP alignment pseudo header */ + skb_pull(ax_skb, 2); + + skb->truesize = pkt_len_plus_padd + + SKB_DATA_ALIGN(sizeof(struct sk_buff)); + ax88179_rx_checksum(ax_skb, pkt_hdr); + usbnet_skb_return(dev, ax_skb); + + skb_pull(skb, pkt_len_plus_padd); } + + return 0; } static struct sk_buff * From 4db9730360ab4ecb6686dd4dafce7d60e485dd67 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Fri, 17 Jun 2022 15:29:49 +0800 Subject: [PATCH 0184/1056] virtio-net: fix race between ndo_open() and virtio_device_ready() commit 50c0ada627f56c92f5953a8bf9158b045ad026a1 upstream. We currently call virtio_device_ready() after netdev registration. Since ndo_open() can be called immediately after register_netdev, this means there exists a race between ndo_open() and virtio_device_ready(): the driver may start to use the device before DRIVER_OK which violates the spec. Fix this by switching to use register_netdevice() and protect the virtio_device_ready() with rtnl_lock() to make sure ndo_open() can only be called after virtio_device_ready(). Fixes: 4baf1e33d0842 ("virtio_net: enable VQs early") Signed-off-by: Jason Wang Message-Id: <20220617072949.30734-1-jasowang@redhat.com> Signed-off-by: Michael S. Tsirkin Signed-off-by: Greg Kroah-Hartman --- drivers/net/virtio_net.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 468d0ffc266b48..318c681ad63ee2 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -3241,14 +3241,20 @@ static int virtnet_probe(struct virtio_device *vdev) } } - err = register_netdev(dev); + /* serialize netdev register + virtio_device_ready() with ndo_open() */ + rtnl_lock(); + + err = register_netdevice(dev); if (err) { pr_debug("virtio_net: registering device failed\n"); + rtnl_unlock(); goto free_failover; } virtio_device_ready(vdev); + rtnl_unlock(); + err = virtnet_cpu_notif_add(vi); if (err) { pr_debug("virtio_net: registering cpu notifier failed\n"); From cc38c1eaa2e04aa81aaa3a32b5235734a8ac01a6 Mon Sep 17 00:00:00 2001 From: Dimitris Michailidis Date: Wed, 22 Jun 2022 17:02:34 -0700 Subject: [PATCH 0185/1056] selftests/net: pass ipv6_args to udpgso_bench's IPv6 TCP test commit b968080808f7f28b89aa495b7402ba48eb17ee93 upstream. udpgso_bench.sh has been running its IPv6 TCP test with IPv4 arguments since its initial conmit. Looks like a typo. Fixes: 3a687bef148d ("selftests: udp gso benchmark") Cc: willemb@google.com Signed-off-by: Dimitris Michailidis Acked-by: Willem de Bruijn Link: https://lore.kernel.org/r/20220623000234.61774-1-dmichail@fungible.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/net/udpgso_bench.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/udpgso_bench.sh b/tools/testing/selftests/net/udpgso_bench.sh index 80b5d352702e57..dc932fd6536345 100755 --- a/tools/testing/selftests/net/udpgso_bench.sh +++ b/tools/testing/selftests/net/udpgso_bench.sh @@ -120,7 +120,7 @@ run_all() { run_udp "${ipv4_args}" echo "ipv6" - run_tcp "${ipv4_args}" + run_tcp "${ipv6_args}" run_udp "${ipv6_args}" } From 1e2327ba0fe928e646e4c69d1fa08e506f7fc62d Mon Sep 17 00:00:00 2001 From: Doug Berger Date: Wed, 22 Jun 2022 20:02:04 -0700 Subject: [PATCH 0186/1056] net: dsa: bcm_sf2: force pause link settings commit 7c97bc0128b2eecc703106112679a69d446d1a12 upstream. The pause settings reported by the PHY should also be applied to the GMII port status override otherwise the switch will not generate pause frames towards the link partner despite the advertisement saying otherwise. Fixes: 246d7f773c13 ("net: dsa: add Broadcom SF2 switch driver") Signed-off-by: Doug Berger Signed-off-by: Florian Fainelli Link: https://lore.kernel.org/r/20220623030204.1966851-1-f.fainelli@gmail.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/net/dsa/bcm_sf2.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c index b3a43a3d90e499..d76b2377d66ef1 100644 --- a/drivers/net/dsa/bcm_sf2.c +++ b/drivers/net/dsa/bcm_sf2.c @@ -865,6 +865,11 @@ static void bcm_sf2_sw_mac_link_up(struct dsa_switch *ds, int port, if (duplex == DUPLEX_FULL) reg |= DUPLX_MODE; + if (tx_pause) + reg |= TXFLOW_CNTL; + if (rx_pause) + reg |= RXFLOW_CNTL; + core_writel(priv, reg, offset); } From 8145f77d38de4f88b8a69e1463f5c09ba189d77c Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 22 Jun 2022 21:20:39 -0700 Subject: [PATCH 0187/1056] net: tun: unlink NAPI from device on destruction commit 3b9bc84d311104906d2b4995a9a02d7b7ddab2db upstream. Syzbot found a race between tun file and device destruction. NAPIs live in struct tun_file which can get destroyed before the netdev so we have to del them explicitly. The current code is missing deleting the NAPI if the queue was detached first. Fixes: 943170998b20 ("tun: enable NAPI for TUN/TAP driver") Reported-by: syzbot+b75c138e9286ac742647@syzkaller.appspotmail.com Link: https://lore.kernel.org/r/20220623042039.2274708-1-kuba@kernel.org Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/net/tun.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 02de8d998bfa42..9cc0e68829005d 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -728,6 +728,7 @@ static void tun_detach_all(struct net_device *dev) sock_put(&tfile->sk); } list_for_each_entry_safe(tfile, tmp, &tun->disabled, next) { + tun_napi_del(tfile); tun_enable_queue(tfile); tun_queue_purge(tfile); xdp_rxq_info_unreg(&tfile->xdp_rxq); From 92e4f3ee5831e025a5bcd81782b1d41b27a90c52 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 22 Jun 2022 21:21:05 -0700 Subject: [PATCH 0188/1056] net: tun: stop NAPI when detaching queues commit a8fc8cb5692aebb9c6f7afd4265366d25dcd1d01 upstream. While looking at a syzbot report I noticed the NAPI only gets disabled before it's deleted. I think that user can detach the queue before destroying the device and the NAPI will never be stopped. Fixes: 943170998b20 ("tun: enable NAPI for TUN/TAP driver") Acked-by: Petar Penkov Link: https://lore.kernel.org/r/20220623042105.2274812-1-kuba@kernel.org Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/net/tun.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 9cc0e68829005d..cdeee7f7074938 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -274,6 +274,12 @@ static void tun_napi_init(struct tun_struct *tun, struct tun_file *tfile, } } +static void tun_napi_enable(struct tun_file *tfile) +{ + if (tfile->napi_enabled) + napi_enable(&tfile->napi); +} + static void tun_napi_disable(struct tun_file *tfile) { if (tfile->napi_enabled) @@ -654,8 +660,10 @@ static void __tun_detach(struct tun_file *tfile, bool clean) if (clean) { RCU_INIT_POINTER(tfile->tun, NULL); sock_put(&tfile->sk); - } else + } else { tun_disable_queue(tun, tfile); + tun_napi_disable(tfile); + } synchronize_net(); tun_flow_delete_by_queue(tun, tun->numqueues + 1); @@ -809,6 +817,7 @@ static int tun_attach(struct tun_struct *tun, struct file *file, if (tfile->detached) { tun_enable_queue(tfile); + tun_napi_enable(tfile); } else { sock_hold(&tfile->sk); tun_napi_init(tun, tfile, napi, napi_frags); From 7191cecb99b28ad749082c2bdad729c532a60533 Mon Sep 17 00:00:00 2001 From: Enguerrand de Ribaucourt Date: Thu, 23 Jun 2022 15:46:44 +0200 Subject: [PATCH 0189/1056] net: dp83822: disable false carrier interrupt commit c96614eeab663646f57f67aa591e015abd8bd0ba upstream. When unplugging an Ethernet cable, false carrier events were produced by the PHY at a very high rate. Once the false carrier counter full, an interrupt was triggered every few clock cycles until the cable was replugged. This resulted in approximately 10k/s interrupts. Since the false carrier counter (FCSCR) is never used, we can safely disable this interrupt. In addition to improving performance, this also solved MDIO read timeouts I was randomly encountering with an i.MX8 fec MAC because of the interrupt flood. The interrupt count and MDIO timeout fix were tested on a v5.4.110 kernel. Fixes: 87461f7a58ab ("net: phy: DP83822 initial driver submission") Signed-off-by: Enguerrand de Ribaucourt Reviewed-by: Andrew Lunn Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/net/phy/dp83822.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/phy/dp83822.c b/drivers/net/phy/dp83822.c index ce17b2af3218f0..ab13699c20bce9 100644 --- a/drivers/net/phy/dp83822.c +++ b/drivers/net/phy/dp83822.c @@ -229,7 +229,6 @@ static int dp83822_config_intr(struct phy_device *phydev) return misr_status; misr_status |= (DP83822_RX_ERR_HF_INT_EN | - DP83822_FALSE_CARRIER_HF_INT_EN | DP83822_LINK_STAT_INT_EN | DP83822_ENERGY_DET_INT_EN | DP83822_LINK_QUAL_INT_EN); From c16404122a7c064b7e4e1b4132c25cfa86f13684 Mon Sep 17 00:00:00 2001 From: Enguerrand de Ribaucourt Date: Thu, 23 Jun 2022 15:46:45 +0200 Subject: [PATCH 0190/1056] net: dp83822: disable rx error interrupt commit 0e597e2affb90d6ea48df6890d882924acf71e19 upstream. Some RX errors, notably when disconnecting the cable, increase the RCSR register. Once half full (0x7fff), an interrupt flood is generated. I measured ~3k/s interrupts even after the RX errors transfer was stopped. Since we don't read and clear the RCSR register, we should disable this interrupt. Fixes: 87461f7a58ab ("net: phy: DP83822 initial driver submission") Signed-off-by: Enguerrand de Ribaucourt Reviewed-by: Andrew Lunn Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/net/phy/dp83822.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/phy/dp83822.c b/drivers/net/phy/dp83822.c index ab13699c20bce9..a792dd6d2ec33e 100644 --- a/drivers/net/phy/dp83822.c +++ b/drivers/net/phy/dp83822.c @@ -228,8 +228,7 @@ static int dp83822_config_intr(struct phy_device *phydev) if (misr_status < 0) return misr_status; - misr_status |= (DP83822_RX_ERR_HF_INT_EN | - DP83822_LINK_STAT_INT_EN | + misr_status |= (DP83822_LINK_STAT_INT_EN | DP83822_ENERGY_DET_INT_EN | DP83822_LINK_QUAL_INT_EN); From 732e73bd81c7fa557b4d00788edc879dcad7c02c Mon Sep 17 00:00:00 2001 From: Kamal Heib Date: Wed, 25 May 2022 16:20:29 +0300 Subject: [PATCH 0191/1056] RDMA/qedr: Fix reporting QP timeout attribute MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 118f767413ada4eef7825fbd4af7c0866f883441 upstream. Make sure to save the passed QP timeout attribute when the QP gets modified, so when calling query QP the right value is reported and not the converted value that is required by the firmware. This issue was found while running the pyverbs tests. Fixes: cecbcddf6461 ("qedr: Add support for QP verbs") Link: https://lore.kernel.org/r/20220525132029.84813-1-kamalheib1@gmail.com Signed-off-by: Kamal Heib Acked-by: Michal Kalderon  Signed-off-by: Leon Romanovsky Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/hw/qedr/qedr.h | 1 + drivers/infiniband/hw/qedr/verbs.c | 4 +++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/qedr/qedr.h b/drivers/infiniband/hw/qedr/qedr.h index 8def88cfa30091..db9ef3e1eb97c6 100644 --- a/drivers/infiniband/hw/qedr/qedr.h +++ b/drivers/infiniband/hw/qedr/qedr.h @@ -418,6 +418,7 @@ struct qedr_qp { u32 sq_psn; u32 qkey; u32 dest_qp_num; + u8 timeout; /* Relevant to qps created from kernel space only (ULPs) */ u8 prev_wqe_size; diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index f652d083ff20fd..49dfedbc566514 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -2622,6 +2622,8 @@ int qedr_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, 1 << max_t(int, attr->timeout - 8, 0); else qp_params.ack_timeout = 0; + + qp->timeout = attr->timeout; } if (attr_mask & IB_QP_RETRY_CNT) { @@ -2781,7 +2783,7 @@ int qedr_query_qp(struct ib_qp *ibqp, rdma_ah_set_dgid_raw(&qp_attr->ah_attr, ¶ms.dgid.bytes[0]); rdma_ah_set_port_num(&qp_attr->ah_attr, 1); rdma_ah_set_sl(&qp_attr->ah_attr, 0); - qp_attr->timeout = params.timeout; + qp_attr->timeout = qp->timeout; qp_attr->rnr_retry = params.rnr_retry; qp_attr->retry_cnt = params.retry_cnt; qp_attr->min_rnr_timer = params.min_rnr_nak_timer; From 889000874c1204e47c7f2a4945db262a47e7efc9 Mon Sep 17 00:00:00 2001 From: Miaoqian Lin Date: Tue, 21 Jun 2022 09:25:44 +0400 Subject: [PATCH 0192/1056] RDMA/cm: Fix memory leak in ib_cm_insert_listen commit 2990f223ffa7bb25422956b9f79f9176a5b38346 upstream. cm_alloc_id_priv() allocates resource for the cm_id_priv. When cm_init_listen() fails it doesn't free it, leading to memory leak. Add the missing error unwind. Fixes: 98f67156a80f ("RDMA/cm: Simplify establishing a listen cm_id") Link: https://lore.kernel.org/r/20220621052546.4821-1-linmq006@gmail.com Signed-off-by: Miaoqian Lin Signed-off-by: Jason Gunthorpe Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/core/cm.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 1c107d6d03b990..b985e0d9bc05e9 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -1252,8 +1252,10 @@ struct ib_cm_id *ib_cm_insert_listen(struct ib_device *device, return ERR_CAST(cm_id_priv); err = cm_init_listen(cm_id_priv, service_id, 0); - if (err) + if (err) { + ib_destroy_cm_id(&cm_id_priv->id); return ERR_PTR(err); + } spin_lock_irq(&cm_id_priv->lock); listen_id_priv = cm_insert_listen(cm_id_priv, cm_handler); From 0b6e0eb5c45e79e9095de2498cc0ca5ec563fc5e Mon Sep 17 00:00:00 2001 From: Tao Liu Date: Mon, 27 Jun 2022 22:00:04 +0800 Subject: [PATCH 0193/1056] linux/dim: Fix divide by 0 in RDMA DIM commit 0fe3dbbefb74a8575f61d7801b08dbc50523d60d upstream. Fix a divide 0 error in rdma_dim_stats_compare() when prev->cpe_ratio == 0. CallTrace: Hardware name: H3C R4900 G3/RS33M2C9S, BIOS 2.00.37P21 03/12/2020 task: ffff880194b78000 task.stack: ffffc90006714000 RIP: 0010:backport_rdma_dim+0x10e/0x240 [mlx_compat] RSP: 0018:ffff880c10e83ec0 EFLAGS: 00010202 RAX: 0000000000002710 RBX: ffff88096cd7f780 RCX: 0000000000000064 RDX: 0000000000000000 RSI: 0000000000000002 RDI: 0000000000000001 RBP: 0000000000000001 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: 000000001d7c6c09 R13: ffff88096cd7f780 R14: ffff880b174fe800 R15: 0000000000000000 FS: 0000000000000000(0000) GS:ffff880c10e80000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00000000a0965b00 CR3: 000000000200a003 CR4: 00000000007606e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Call Trace: ib_poll_handler+0x43/0x80 [ib_core] irq_poll_softirq+0xae/0x110 __do_softirq+0xd1/0x28c irq_exit+0xde/0xf0 do_IRQ+0x54/0xe0 common_interrupt+0x8f/0x8f ? cpuidle_enter_state+0xd9/0x2a0 ? cpuidle_enter_state+0xc7/0x2a0 ? do_idle+0x170/0x1d0 ? cpu_startup_entry+0x6f/0x80 ? start_secondary+0x1b9/0x210 ? secondary_startup_64+0xa5/0xb0 Code: 0f 87 e1 00 00 00 8b 4c 24 14 44 8b 43 14 89 c8 4d 63 c8 44 29 c0 99 31 d0 29 d0 31 d2 48 98 48 8d 04 80 48 8d 04 80 48 c1 e0 02 <49> f7 f1 48 83 f8 0a 0f 86 c1 00 00 00 44 39 c1 7f 10 48 89 df RIP: backport_rdma_dim+0x10e/0x240 [mlx_compat] RSP: ffff880c10e83ec0 Fixes: f4915455dcf0 ("linux/dim: Implement RDMA adaptive moderation (DIM)") Link: https://lore.kernel.org/r/20220627140004.3099-1-thomas.liu@ucloud.cn Signed-off-by: Tao Liu Reviewed-by: Max Gurtovoy Acked-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe Signed-off-by: Greg Kroah-Hartman --- include/linux/dim.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/dim.h b/include/linux/dim.h index b698266d00356e..6c5733981563ea 100644 --- a/include/linux/dim.h +++ b/include/linux/dim.h @@ -21,7 +21,7 @@ * We consider 10% difference as significant. */ #define IS_SIGNIFICANT_DIFF(val, ref) \ - (((100UL * abs((val) - (ref))) / (ref)) > 10) + ((ref) && (((100UL * abs((val) - (ref))) / (ref)) > 10)) /* * Calculate the gap between two values. From f7eaa228b0fac2ef3a875f6e436fd210a5a831d6 Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Fri, 24 Jun 2022 09:51:39 +0200 Subject: [PATCH 0194/1056] net: usb: asix: do not force pause frames support commit ce95ab775f8d8e89a038c0e5611a7381a2ef8e43 upstream. We should respect link partner capabilities and not force flow control support on every link. Even more, in current state the MAC driver do not advertises pause support so we should not keep flow control enabled at all. Fixes: e532a096be0e ("net: usb: asix: ax88772: add phylib support") Reported-by: Anton Lundin Signed-off-by: Oleksij Rempel Tested-by: Anton Lundin Link: https://lore.kernel.org/r/20220624075139.3139300-2-o.rempel@pengutronix.de Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/net/usb/asix.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/usb/asix.h b/drivers/net/usb/asix.h index 4334aafab59a4b..c126df1c13ee71 100644 --- a/drivers/net/usb/asix.h +++ b/drivers/net/usb/asix.h @@ -126,8 +126,7 @@ AX_MEDIUM_RE) #define AX88772_MEDIUM_DEFAULT \ - (AX_MEDIUM_FD | AX_MEDIUM_RFC | \ - AX_MEDIUM_TFC | AX_MEDIUM_PS | \ + (AX_MEDIUM_FD | AX_MEDIUM_PS | \ AX_MEDIUM_AC | AX_MEDIUM_RE) /* AX88772 & AX88178 RX_CTL values */ From 536d2a6a4fd691701de452b0a39ec3be65bf777a Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Tue, 28 Jun 2022 11:35:17 +0200 Subject: [PATCH 0195/1056] usbnet: fix memory allocation in helpers commit e65af5403e462ccd7dff6a045a886c64da598c2e upstream. usbnet provides some helper functions that are also used in the context of reset() operations. During a reset the other drivers on a device are unable to operate. As that can be block drivers, a driver for another interface cannot use paging in its memory allocations without risking a deadlock. Use GFP_NOIO in the helpers. Fixes: 877bd862f32b8 ("usbnet: introduce usbnet 3 command helpers") Signed-off-by: Oliver Neukum Link: https://lore.kernel.org/r/20220628093517.7469-1-oneukum@suse.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/net/usb/usbnet.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c index af2bbaff247822..a0ea236ac60e02 100644 --- a/drivers/net/usb/usbnet.c +++ b/drivers/net/usb/usbnet.c @@ -2002,7 +2002,7 @@ static int __usbnet_read_cmd(struct usbnet *dev, u8 cmd, u8 reqtype, cmd, reqtype, value, index, size); if (size) { - buf = kmalloc(size, GFP_KERNEL); + buf = kmalloc(size, GFP_NOIO); if (!buf) goto out; } @@ -2034,7 +2034,7 @@ static int __usbnet_write_cmd(struct usbnet *dev, u8 cmd, u8 reqtype, cmd, reqtype, value, index, size); if (data) { - buf = kmemdup(data, size, GFP_KERNEL); + buf = kmemdup(data, size, GFP_NOIO); if (!buf) goto out; } else { From 0b842b9e84cd176be314bce1a3faa50100b3094e Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Mon, 27 Jun 2022 18:02:41 -0700 Subject: [PATCH 0196/1056] selftests: mptcp: more stable diag tests commit 42fb6cddec3b306c9f6ef136b6438e0de1836431 upstream. The mentioned test-case still use an hard-coded-len sleep to wait for a relative large number of connection to be established. On very slow VM and with debug build such timeout could be exceeded, causing failures in our CI. Address the issue polling for the expected condition several times, up to an unreasonable high amount of time. On reasonably fast system the self-tests will be faster then before, on very slow one we will still catch the correct condition. Fixes: df62f2ec3df6 ("selftests/mptcp: add diag interface tests") Signed-off-by: Paolo Abeni Signed-off-by: Mat Martineau Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/net/mptcp/diag.sh | 48 +++++++++++++++++++---- 1 file changed, 40 insertions(+), 8 deletions(-) diff --git a/tools/testing/selftests/net/mptcp/diag.sh b/tools/testing/selftests/net/mptcp/diag.sh index ff821025d30963..49dfabded1d443 100755 --- a/tools/testing/selftests/net/mptcp/diag.sh +++ b/tools/testing/selftests/net/mptcp/diag.sh @@ -61,6 +61,39 @@ chk_msk_nr() __chk_nr "grep -c token:" $* } +wait_msk_nr() +{ + local condition="grep -c token:" + local expected=$1 + local timeout=20 + local msg nr + local max=0 + local i=0 + + shift 1 + msg=$* + + while [ $i -lt $timeout ]; do + nr=$(ss -inmHMN $ns | $condition) + [ $nr == $expected ] && break; + [ $nr -gt $max ] && max=$nr + i=$((i + 1)) + sleep 1 + done + + printf "%-50s" "$msg" + if [ $i -ge $timeout ]; then + echo "[ fail ] timeout while expecting $expected max $max last $nr" + ret=$test_cnt + elif [ $nr != $expected ]; then + echo "[ fail ] expected $expected found $nr" + ret=$test_cnt + else + echo "[ ok ]" + fi + test_cnt=$((test_cnt+1)) +} + chk_msk_fallback_nr() { __chk_nr "grep -c fallback" $* @@ -109,7 +142,7 @@ ip -n $ns link set dev lo up echo "a" | \ timeout ${timeout_test} \ ip netns exec $ns \ - ./mptcp_connect -p 10000 -l -t ${timeout_poll} \ + ./mptcp_connect -p 10000 -l -t ${timeout_poll} -w 20 \ 0.0.0.0 >/dev/null & wait_local_port_listen $ns 10000 chk_msk_nr 0 "no msk on netns creation" @@ -117,7 +150,7 @@ chk_msk_nr 0 "no msk on netns creation" echo "b" | \ timeout ${timeout_test} \ ip netns exec $ns \ - ./mptcp_connect -p 10000 -r 0 -t ${timeout_poll} \ + ./mptcp_connect -p 10000 -r 0 -t ${timeout_poll} -w 20 \ 127.0.0.1 >/dev/null & wait_connected $ns 10000 chk_msk_nr 2 "after MPC handshake " @@ -129,13 +162,13 @@ flush_pids echo "a" | \ timeout ${timeout_test} \ ip netns exec $ns \ - ./mptcp_connect -p 10001 -l -s TCP -t ${timeout_poll} \ + ./mptcp_connect -p 10001 -l -s TCP -t ${timeout_poll} -w 20 \ 0.0.0.0 >/dev/null & wait_local_port_listen $ns 10001 echo "b" | \ timeout ${timeout_test} \ ip netns exec $ns \ - ./mptcp_connect -p 10001 -r 0 -t ${timeout_poll} \ + ./mptcp_connect -p 10001 -r 0 -t ${timeout_poll} -w 20 \ 127.0.0.1 >/dev/null & wait_connected $ns 10001 chk_msk_fallback_nr 1 "check fallback" @@ -146,7 +179,7 @@ for I in `seq 1 $NR_CLIENTS`; do echo "a" | \ timeout ${timeout_test} \ ip netns exec $ns \ - ./mptcp_connect -p $((I+10001)) -l -w 10 \ + ./mptcp_connect -p $((I+10001)) -l -w 20 \ -t ${timeout_poll} 0.0.0.0 >/dev/null & done wait_local_port_listen $ns $((NR_CLIENTS + 10001)) @@ -155,12 +188,11 @@ for I in `seq 1 $NR_CLIENTS`; do echo "b" | \ timeout ${timeout_test} \ ip netns exec $ns \ - ./mptcp_connect -p $((I+10001)) -w 10 \ + ./mptcp_connect -p $((I+10001)) -w 20 \ -t ${timeout_poll} 127.0.0.1 >/dev/null & done -sleep 1.5 -chk_msk_nr $((NR_CLIENTS*2)) "many msk socket present" +wait_msk_nr $((NR_CLIENTS*2)) "many msk socket present" flush_pids exit $ret From c9f8f94d3ca916e2eecc0ba8227466a871432628 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Tue, 28 Jun 2022 11:31:34 +0800 Subject: [PATCH 0197/1056] net: ipv6: unexport __init-annotated seg6_hmac_net_init() commit 53ad46169fe2996fe1b623ba6c9c4fa33847876f upstream. As of commit 5801f064e351 ("net: ipv6: unexport __init-annotated seg6_hmac_init()"), EXPORT_SYMBOL and __init is a bad combination because the .init.text section is freed up after the initialization. Hence, modules cannot use symbols annotated __init. The access to a freed symbol may end up with kernel panic. This remove the EXPORT_SYMBOL to fix modpost warning: WARNING: modpost: vmlinux.o(___ksymtab+seg6_hmac_net_init+0x0): Section mismatch in reference from the variable __ksymtab_seg6_hmac_net_init to the function .init.text:seg6_hmac_net_init() The symbol seg6_hmac_net_init is exported and annotated __init Fix this by removing the __init annotation of seg6_hmac_net_init or drop the export. Fixes: bf355b8d2c30 ("ipv6: sr: add core files for SR HMAC support") Reported-by: Hulk Robot Signed-off-by: YueHaibing Link: https://lore.kernel.org/r/20220628033134.21088-1-yuehaibing@huawei.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/ipv6/seg6_hmac.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/ipv6/seg6_hmac.c b/net/ipv6/seg6_hmac.c index 5b2c9ce533951e..b7d6b64cc5320e 100644 --- a/net/ipv6/seg6_hmac.c +++ b/net/ipv6/seg6_hmac.c @@ -408,7 +408,6 @@ int __net_init seg6_hmac_net_init(struct net *net) return 0; } -EXPORT_SYMBOL(seg6_hmac_net_init); void seg6_hmac_exit(void) { From 115d941916913562831570e1fdb1da55c2db4ed5 Mon Sep 17 00:00:00 2001 From: Alexey Khoroshilov Date: Sat, 25 Jun 2022 23:52:43 +0300 Subject: [PATCH 0198/1056] NFSD: restore EINVAL error translation in nfsd_commit() commit 8a9ffb8c857c2c99403bd6483a5a005fed5c0773 upstream. commit 555dbf1a9aac ("nfsd: Replace use of rwsem with errseq_t") incidentally broke translation of -EINVAL to nfserr_notsupp. The patch restores that. Found by Linux Verification Center (linuxtesting.org) with SVACE. Signed-off-by: Alexey Khoroshilov Fixes: 555dbf1a9aac ("nfsd: Replace use of rwsem with errseq_t") Signed-off-by: Chuck Lever Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/vfs.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 7bfb685831546e..4949f4a8cd4740 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -1142,6 +1142,7 @@ nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp, nfsd_net_id)); err2 = filemap_check_wb_err(nf->nf_file->f_mapping, since); + err = nfserrno(err2); break; case -EINVAL: err = nfserr_notsupp; @@ -1149,8 +1150,8 @@ nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp, default: nfsd_reset_boot_verifier(net_generic(nf->nf_net, nfsd_net_id)); + err = nfserrno(err2); } - err = nfserrno(err2); } else nfsd_copy_boot_verifier(verf, net_generic(nf->nf_net, nfsd_net_id)); From 4d5055873e24bba9e01b3b5d16790714b0d38533 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Thu, 30 Jun 2022 22:58:49 +0300 Subject: [PATCH 0199/1056] vfs: fix copy_file_range() regression in cross-fs copies commit 868f9f2f8e004bfe0d3935b1976f625b2924893b upstream. A regression has been reported by Nicolas Boichat, found while using the copy_file_range syscall to copy a tracefs file. Before commit 5dae222a5ff0 ("vfs: allow copy_file_range to copy across devices") the kernel would return -EXDEV to userspace when trying to copy a file across different filesystems. After this commit, the syscall doesn't fail anymore and instead returns zero (zero bytes copied), as this file's content is generated on-the-fly and thus reports a size of zero. Another regression has been reported by He Zhe - the assertion of WARN_ON_ONCE(ret == -EOPNOTSUPP) can be triggered from userspace when copying from a sysfs file whose read operation may return -EOPNOTSUPP. Since we do not have test coverage for copy_file_range() between any two types of filesystems, the best way to avoid these sort of issues in the future is for the kernel to be more picky about filesystems that are allowed to do copy_file_range(). This patch restores some cross-filesystem copy restrictions that existed prior to commit 5dae222a5ff0 ("vfs: allow copy_file_range to copy across devices"), namely, cross-sb copy is not allowed for filesystems that do not implement ->copy_file_range(). Filesystems that do implement ->copy_file_range() have full control of the result - if this method returns an error, the error is returned to the user. Before this change this was only true for fs that did not implement the ->remap_file_range() operation (i.e. nfsv3). Filesystems that do not implement ->copy_file_range() still fall-back to the generic_copy_file_range() implementation when the copy is within the same sb. This helps the kernel can maintain a more consistent story about which filesystems support copy_file_range(). nfsd and ksmbd servers are modified to fall-back to the generic_copy_file_range() implementation in case vfs_copy_file_range() fails with -EOPNOTSUPP or -EXDEV, which preserves behavior of server-side-copy. fall-back to generic_copy_file_range() is not implemented for the smb operation FSCTL_DUPLICATE_EXTENTS_TO_FILE, which is arguably a correct change of behavior. Fixes: 5dae222a5ff0 ("vfs: allow copy_file_range to copy across devices") Link: https://lore.kernel.org/linux-fsdevel/20210212044405.4120619-1-drinkcat@chromium.org/ Link: https://lore.kernel.org/linux-fsdevel/CANMq1KDZuxir2LM5jOTm0xx+BnvW=ZmpsG47CyHFJwnw7zSX6Q@mail.gmail.com/ Link: https://lore.kernel.org/linux-fsdevel/20210126135012.1.If45b7cdc3ff707bc1efa17f5366057d60603c45f@changeid/ Link: https://lore.kernel.org/linux-fsdevel/20210630161320.29006-1-lhenriques@suse.de/ Reported-by: Nicolas Boichat Reported-by: kernel test robot Signed-off-by: Luis Henriques Fixes: 64bf5ff58dff ("vfs: no fallback for ->copy_file_range") Link: https://lore.kernel.org/linux-fsdevel/20f17f64-88cb-4e80-07c1-85cb96c83619@windriver.com/ Reported-by: He Zhe Tested-by: Namjae Jeon Tested-by: Luis Henriques Signed-off-by: Amir Goldstein Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/ksmbd/smb2pdu.c | 16 ++++++++-- fs/ksmbd/vfs.c | 4 +++ fs/nfsd/vfs.c | 8 ++++- fs/read_write.c | 77 ++++++++++++++++++++++++++-------------------- 4 files changed, 68 insertions(+), 37 deletions(-) diff --git a/fs/ksmbd/smb2pdu.c b/fs/ksmbd/smb2pdu.c index 5fc161b8c3859d..876afde0ea6609 100644 --- a/fs/ksmbd/smb2pdu.c +++ b/fs/ksmbd/smb2pdu.c @@ -7794,14 +7794,24 @@ int smb2_ioctl(struct ksmbd_work *work) src_off = le64_to_cpu(dup_ext->SourceFileOffset); dst_off = le64_to_cpu(dup_ext->TargetFileOffset); length = le64_to_cpu(dup_ext->ByteCount); - cloned = vfs_clone_file_range(fp_in->filp, src_off, fp_out->filp, - dst_off, length, 0); + /* + * XXX: It is not clear if FSCTL_DUPLICATE_EXTENTS_TO_FILE + * should fall back to vfs_copy_file_range(). This could be + * beneficial when re-exporting nfs/smb mount, but note that + * this can result in partial copy that returns an error status. + * If/when FSCTL_DUPLICATE_EXTENTS_TO_FILE_EX is implemented, + * fall back to vfs_copy_file_range(), should be avoided when + * the flag DUPLICATE_EXTENTS_DATA_EX_SOURCE_ATOMIC is set. + */ + cloned = vfs_clone_file_range(fp_in->filp, src_off, + fp_out->filp, dst_off, length, 0); if (cloned == -EXDEV || cloned == -EOPNOTSUPP) { ret = -EOPNOTSUPP; goto dup_ext_out; } else if (cloned != length) { cloned = vfs_copy_file_range(fp_in->filp, src_off, - fp_out->filp, dst_off, length, 0); + fp_out->filp, dst_off, + length, 0); if (cloned != length) { if (cloned < 0) ret = cloned; diff --git a/fs/ksmbd/vfs.c b/fs/ksmbd/vfs.c index ce852a12dea682..2139aa042c790b 100644 --- a/fs/ksmbd/vfs.c +++ b/fs/ksmbd/vfs.c @@ -1782,6 +1782,10 @@ int ksmbd_vfs_copy_file_ranges(struct ksmbd_work *work, ret = vfs_copy_file_range(src_fp->filp, src_off, dst_fp->filp, dst_off, len, 0); + if (ret == -EOPNOTSUPP || ret == -EXDEV) + ret = generic_copy_file_range(src_fp->filp, src_off, + dst_fp->filp, dst_off, + len, 0); if (ret < 0) return ret; diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 4949f4a8cd4740..5f62fa0963ced7 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -560,6 +560,7 @@ __be32 nfsd4_clone_file_range(struct nfsd_file *nf_src, u64 src_pos, ssize_t nfsd_copy_file_range(struct file *src, u64 src_pos, struct file *dst, u64 dst_pos, u64 count) { + ssize_t ret; /* * Limit copy to 4MB to prevent indefinitely blocking an nfsd @@ -570,7 +571,12 @@ ssize_t nfsd_copy_file_range(struct file *src, u64 src_pos, struct file *dst, * limit like this and pipeline multiple COPY requests. */ count = min_t(u64, count, 1 << 22); - return vfs_copy_file_range(src, src_pos, dst, dst_pos, count, 0); + ret = vfs_copy_file_range(src, src_pos, dst, dst_pos, count, 0); + + if (ret == -EOPNOTSUPP || ret == -EXDEV) + ret = generic_copy_file_range(src, src_pos, dst, dst_pos, + count, 0); + return ret; } __be32 nfsd4_vfs_fallocate(struct svc_rqst *rqstp, struct svc_fh *fhp, diff --git a/fs/read_write.c b/fs/read_write.c index af057c57bdc644..c6db1a0762fab8 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -1384,28 +1384,6 @@ ssize_t generic_copy_file_range(struct file *file_in, loff_t pos_in, } EXPORT_SYMBOL(generic_copy_file_range); -static ssize_t do_copy_file_range(struct file *file_in, loff_t pos_in, - struct file *file_out, loff_t pos_out, - size_t len, unsigned int flags) -{ - /* - * Although we now allow filesystems to handle cross sb copy, passing - * a file of the wrong filesystem type to filesystem driver can result - * in an attempt to dereference the wrong type of ->private_data, so - * avoid doing that until we really have a good reason. NFS defines - * several different file_system_type structures, but they all end up - * using the same ->copy_file_range() function pointer. - */ - if (file_out->f_op->copy_file_range && - file_out->f_op->copy_file_range == file_in->f_op->copy_file_range) - return file_out->f_op->copy_file_range(file_in, pos_in, - file_out, pos_out, - len, flags); - - return generic_copy_file_range(file_in, pos_in, file_out, pos_out, len, - flags); -} - /* * Performs necessary checks before doing a file copy * @@ -1427,6 +1405,24 @@ static int generic_copy_file_checks(struct file *file_in, loff_t pos_in, if (ret) return ret; + /* + * We allow some filesystems to handle cross sb copy, but passing + * a file of the wrong filesystem type to filesystem driver can result + * in an attempt to dereference the wrong type of ->private_data, so + * avoid doing that until we really have a good reason. + * + * nfs and cifs define several different file_system_type structures + * and several different sets of file_operations, but they all end up + * using the same ->copy_file_range() function pointer. + */ + if (file_out->f_op->copy_file_range) { + if (file_in->f_op->copy_file_range != + file_out->f_op->copy_file_range) + return -EXDEV; + } else if (file_inode(file_in)->i_sb != file_inode(file_out)->i_sb) { + return -EXDEV; + } + /* Don't touch certain kinds of inodes */ if (IS_IMMUTABLE(inode_out)) return -EPERM; @@ -1492,26 +1488,41 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in, file_start_write(file_out); /* - * Try cloning first, this is supported by more file systems, and - * more efficient if both clone and copy are supported (e.g. NFS). + * Cloning is supported by more file systems, so we implement copy on + * same sb using clone, but for filesystems where both clone and copy + * are supported (e.g. nfs,cifs), we only call the copy method. */ + if (file_out->f_op->copy_file_range) { + ret = file_out->f_op->copy_file_range(file_in, pos_in, + file_out, pos_out, + len, flags); + goto done; + } + if (file_in->f_op->remap_file_range && file_inode(file_in)->i_sb == file_inode(file_out)->i_sb) { - loff_t cloned; - - cloned = file_in->f_op->remap_file_range(file_in, pos_in, + ret = file_in->f_op->remap_file_range(file_in, pos_in, file_out, pos_out, min_t(loff_t, MAX_RW_COUNT, len), REMAP_FILE_CAN_SHORTEN); - if (cloned > 0) { - ret = cloned; + if (ret > 0) goto done; - } } - ret = do_copy_file_range(file_in, pos_in, file_out, pos_out, len, - flags); - WARN_ON_ONCE(ret == -EOPNOTSUPP); + /* + * We can get here for same sb copy of filesystems that do not implement + * ->copy_file_range() in case filesystem does not support clone or in + * case filesystem supports clone but rejected the clone request (e.g. + * because it was not block aligned). + * + * In both cases, fall back to kernel copy so we are able to maintain a + * consistent story about which filesystems support copy_file_range() + * and which filesystems do not, that will allow userspace tools to + * make consistent desicions w.r.t using copy_file_range(). + */ + ret = generic_copy_file_range(file_in, pos_in, file_out, pos_out, len, + flags); + done: if (ret > 0) { fsnotify_access(file_in); From 7cf7ed8f23c4e451a109b187fa12e071e98b9968 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Mon, 20 Jun 2022 13:11:14 +0800 Subject: [PATCH 0200/1056] caif_virtio: fix race between virtio_device_ready() and ndo_open() commit 11a37eb66812ce6a06b79223ad530eb0e1d7294d upstream. We currently depend on probe() calling virtio_device_ready() - which happens after netdev registration. Since ndo_open() can be called immediately after register_netdev, this means there exists a race between ndo_open() and virtio_device_ready(): the driver may start to use the device (e.g. TX) before DRIVER_OK which violates the spec. Fix this by switching to use register_netdevice() and protect the virtio_device_ready() with rtnl_lock() to make sure ndo_open() can only be called after virtio_device_ready(). Fixes: 0d2e1a2926b18 ("caif_virtio: Introduce caif over virtio") Signed-off-by: Jason Wang Message-Id: <20220620051115.3142-3-jasowang@redhat.com> Signed-off-by: Michael S. Tsirkin Signed-off-by: Greg Kroah-Hartman --- drivers/net/caif/caif_virtio.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/net/caif/caif_virtio.c b/drivers/net/caif/caif_virtio.c index 91230894692d27..da87de02b2fcc4 100644 --- a/drivers/net/caif/caif_virtio.c +++ b/drivers/net/caif/caif_virtio.c @@ -721,13 +721,21 @@ static int cfv_probe(struct virtio_device *vdev) /* Carrier is off until netdevice is opened */ netif_carrier_off(netdev); + /* serialize netdev register + virtio_device_ready() with ndo_open() */ + rtnl_lock(); + /* register Netdev */ - err = register_netdev(netdev); + err = register_netdevice(netdev); if (err) { + rtnl_unlock(); dev_err(&vdev->dev, "Unable to register netdev (%d)\n", err); goto err; } + virtio_device_ready(vdev); + + rtnl_unlock(); + debugfs_init(cfv); return 0; From 01121e39ef537289926ae6f5374dce92c796d863 Mon Sep 17 00:00:00 2001 From: Miaoqian Lin Date: Thu, 26 May 2022 12:28:56 +0400 Subject: [PATCH 0201/1056] PM / devfreq: exynos-ppmu: Fix refcount leak in of_get_devfreq_events commit f44b799603a9b5d2e375b0b2d54dd0b791eddfc2 upstream. of_get_child_by_name() returns a node pointer with refcount incremented, we should use of_node_put() on it when done. This function only calls of_node_put() in normal path, missing it in error paths. Add missing of_node_put() to avoid refcount leak. Fixes: f262f28c1470 ("PM / devfreq: event: Add devfreq_event class") Signed-off-by: Miaoqian Lin Signed-off-by: Chanwoo Choi Signed-off-by: Greg Kroah-Hartman --- drivers/devfreq/event/exynos-ppmu.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/devfreq/event/exynos-ppmu.c b/drivers/devfreq/event/exynos-ppmu.c index 17ed980d909986..d6da9c3e310673 100644 --- a/drivers/devfreq/event/exynos-ppmu.c +++ b/drivers/devfreq/event/exynos-ppmu.c @@ -514,15 +514,19 @@ static int of_get_devfreq_events(struct device_node *np, count = of_get_child_count(events_np); desc = devm_kcalloc(dev, count, sizeof(*desc), GFP_KERNEL); - if (!desc) + if (!desc) { + of_node_put(events_np); return -ENOMEM; + } info->num_events = count; of_id = of_match_device(exynos_ppmu_id_match, dev); if (of_id) info->ppmu_type = (enum exynos_ppmu_type)of_id->data; - else + else { + of_node_put(events_np); return -EINVAL; + } j = 0; for_each_child_of_node(events_np, node) { From 10f88306f9f36b8ffcbc11ce57f9afde191128bc Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Mon, 13 Jun 2022 10:59:57 +0300 Subject: [PATCH 0202/1056] vdpa/mlx5: Update Control VQ callback information commit 40f2f3e94178d45e4ee6078effba2dfc76f6f5ba upstream. The control VQ specific information is stored in the dedicated struct mlx5_control_vq. When the callback is updated through mlx5_vdpa_set_vq_cb(), make sure to update the control VQ struct. Fixes: 5262912ef3cf ("vdpa/mlx5: Add support for control VQ and MAC setting") Signed-off-by: Eli Cohen Message-Id: <20220613075958.511064-1-elic@nvidia.com> Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang --- drivers/vdpa/mlx5/net/mlx5_vnet.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c index e4258f40dcd746..174895372e7f38 100644 --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c @@ -1698,6 +1698,8 @@ static void mlx5_vdpa_set_vq_cb(struct vdpa_device *vdev, u16 idx, struct vdpa_c struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); ndev->event_cbs[idx] = *cb; + if (is_ctrl_vq_idx(mvdev, idx)) + mvdev->cvq.event_cb = *cb; } static void mlx5_cvq_notify(struct vringh *vring) From a6c5c65f4c371f89bb60b3b044f136f9131afd8a Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Tue, 14 Jun 2022 02:09:00 +0900 Subject: [PATCH 0203/1056] s390: remove unneeded 'select BUILD_BIN2C' commit 25deecb21c18ee29e3be8ac6177b2a9504c33d2d upstream. Since commit 4c0f032d4963 ("s390/purgatory: Omit use of bin2c"), s390 builds the purgatory without using bin2c. Remove 'select BUILD_BIN2C' to avoid the unneeded build of bin2c. Fixes: 4c0f032d4963 ("s390/purgatory: Omit use of bin2c") Signed-off-by: Masahiro Yamada Link: https://lore.kernel.org/r/20220613170902.1775211-1-masahiroy@kernel.org Signed-off-by: Alexander Gordeev Signed-off-by: Greg Kroah-Hartman --- arch/s390/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 797041b5109a2e..e402fa964f2353 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -516,7 +516,6 @@ config KEXEC config KEXEC_FILE bool "kexec file based system call" select KEXEC_CORE - select BUILD_BIN2C depends on CRYPTO depends on CRYPTO_SHA256 depends on CRYPTO_SHA256_S390 From 031561caa38a67cbf3084c38b8ebed52bbc1e602 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 21 Jun 2022 14:01:41 +0200 Subject: [PATCH 0204/1056] netfilter: nft_dynset: restore set element counter when failing to update commit 05907f10e235680cc7fb196810e4ad3215d5e648 upstream. This patch fixes a race condition. nft_rhash_update() might fail for two reasons: - Element already exists in the hashtable. - Another packet won race to insert an entry in the hashtable. In both cases, new() has already bumped the counter via atomic_add_unless(), therefore, decrement the set element counter. Fixes: 22fe54d5fefc ("netfilter: nf_tables: add support for dynamic set updates") Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- net/netfilter/nft_set_hash.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c index df40314de21f51..76de6c8d986550 100644 --- a/net/netfilter/nft_set_hash.c +++ b/net/netfilter/nft_set_hash.c @@ -143,6 +143,7 @@ static bool nft_rhash_update(struct nft_set *set, const u32 *key, /* Another cpu may race to insert the element with the same key */ if (prev) { nft_set_elem_destroy(set, he, true); + atomic_dec(&set->nelems); he = prev; } @@ -152,6 +153,7 @@ static bool nft_rhash_update(struct nft_set *set, const u32 *key, err2: nft_set_elem_destroy(set, he, true); + atomic_dec(&set->nelems); err1: return false; } From 1d776f085006a4440c21af2a9a66202a6d577b03 Mon Sep 17 00:00:00 2001 From: Liang He Date: Wed, 22 Jun 2022 12:06:21 +0800 Subject: [PATCH 0205/1056] net/dsa/hirschmann: Add missing of_node_get() in hellcreek_led_setup() commit 16d584d2fc8f4ea36203af45a76becd7093586f1 upstream. of_find_node_by_name() will decrease the refcount of its first arg and we need a of_node_get() to keep refcount balance. Fixes: 7d9ee2e8ff15 ("net: dsa: hellcreek: Add PTP status LEDs") Signed-off-by: Liang He Link: https://lore.kernel.org/r/20220622040621.4094304-1-windhl@126.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/net/dsa/hirschmann/hellcreek_ptp.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/dsa/hirschmann/hellcreek_ptp.c b/drivers/net/dsa/hirschmann/hellcreek_ptp.c index 2572c6087bb5a1..b28baab6d56a17 100644 --- a/drivers/net/dsa/hirschmann/hellcreek_ptp.c +++ b/drivers/net/dsa/hirschmann/hellcreek_ptp.c @@ -300,6 +300,7 @@ static int hellcreek_led_setup(struct hellcreek *hellcreek) const char *label, *state; int ret = -EINVAL; + of_node_get(hellcreek->dev->of_node); leds = of_find_node_by_name(hellcreek->dev->of_node, "leds"); if (!leds) { dev_err(hellcreek->dev, "No LEDs specified in device tree!\n"); From ed303cc7aab93e4a07e1e8eb556a647173a70955 Mon Sep 17 00:00:00 2001 From: Victor Nogueira Date: Thu, 23 Jun 2022 11:07:41 -0300 Subject: [PATCH 0206/1056] net/sched: act_api: Notify user space if any actions were flushed before error commit 76b39b94382f9e0a639e1c70c3253de248cc4c83 upstream. If during an action flush operation one of the actions is still being referenced, the flush operation is aborted and the kernel returns to user space with an error. However, if the kernel was able to flush, for example, 3 actions and failed on the fourth, the kernel will not notify user space that it deleted 3 actions before failing. This patch fixes that behaviour by notifying user space of how many actions were deleted before flush failed and by setting extack with a message describing what happened. Fixes: 55334a5db5cd ("net_sched: act: refuse to remove bound action outside") Signed-off-by: Victor Nogueira Acked-by: Jamal Hadi Salim Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/sched/act_api.c | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 7d53272727bfab..d775676956bf93 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -350,7 +350,8 @@ static int tcf_idr_release_unsafe(struct tc_action *p) } static int tcf_del_walker(struct tcf_idrinfo *idrinfo, struct sk_buff *skb, - const struct tc_action_ops *ops) + const struct tc_action_ops *ops, + struct netlink_ext_ack *extack) { struct nlattr *nest; int n_i = 0; @@ -366,20 +367,25 @@ static int tcf_del_walker(struct tcf_idrinfo *idrinfo, struct sk_buff *skb, if (nla_put_string(skb, TCA_KIND, ops->kind)) goto nla_put_failure; + ret = 0; mutex_lock(&idrinfo->lock); idr_for_each_entry_ul(idr, p, tmp, id) { if (IS_ERR(p)) continue; ret = tcf_idr_release_unsafe(p); - if (ret == ACT_P_DELETED) { + if (ret == ACT_P_DELETED) module_put(ops->owner); - n_i++; - } else if (ret < 0) { - mutex_unlock(&idrinfo->lock); - goto nla_put_failure; - } + else if (ret < 0) + break; + n_i++; } mutex_unlock(&idrinfo->lock); + if (ret < 0) { + if (n_i) + NL_SET_ERR_MSG(extack, "Unable to flush all TC actions"); + else + goto nla_put_failure; + } ret = nla_put_u32(skb, TCA_FCNT, n_i); if (ret) @@ -400,7 +406,7 @@ int tcf_generic_walker(struct tc_action_net *tn, struct sk_buff *skb, struct tcf_idrinfo *idrinfo = tn->idrinfo; if (type == RTM_DELACTION) { - return tcf_del_walker(idrinfo, skb, ops); + return tcf_del_walker(idrinfo, skb, ops, extack); } else if (type == RTM_GETACTION) { return tcf_dump_walker(idrinfo, skb, cb); } else { From efafb28ff39f7989ee27e485721025e8ed86182c Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Fri, 24 Jun 2022 09:51:38 +0200 Subject: [PATCH 0207/1056] net: asix: fix "can't send until first packet is send" issue commit 805206e66fab4ba1e0ebd19402006d62cd1d4902 upstream. If cable is attached after probe sequence, the usbnet framework would not automatically start processing RX packets except at least one packet was transmitted. On systems with any kind of address auto configuration this issue was not detected, because some packets are send immediately after link state is changed to "running". With this patch we will notify usbnet about link status change provided by the PHYlib. Fixes: e532a096be0e ("net: usb: asix: ax88772: add phylib support") Reported-by: Anton Lundin Signed-off-by: Oleksij Rempel Tested-by: Anton Lundin Link: https://lore.kernel.org/r/20220624075139.3139300-1-o.rempel@pengutronix.de Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/net/usb/asix_common.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/usb/asix_common.c b/drivers/net/usb/asix_common.c index f39188b7717ae8..00c23f1d1c9467 100644 --- a/drivers/net/usb/asix_common.c +++ b/drivers/net/usb/asix_common.c @@ -431,6 +431,7 @@ void asix_adjust_link(struct net_device *netdev) asix_write_medium_mode(dev, mode, 0); phy_print_status(phydev); + usbnet_link_change(dev, phydev->link, 0); } int asix_write_gpio(struct usbnet *dev, u16 value, int sleep, int in_pm) From 0d139145cc0fb523c17e818794a59d9e265ae8ad Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 27 Jun 2022 10:28:13 +0000 Subject: [PATCH 0208/1056] net: bonding: fix possible NULL deref in rlb code commit ab84db251c04d38b8dc7ee86e13d4050bedb1c88 upstream. syzbot has two reports involving the same root cause. bond_alb_initialize() must not set bond->alb_info.rlb_enabled if a memory allocation error is detected. Report 1: general protection fault, probably for non-canonical address 0xdffffc0000000002: 0000 [#1] PREEMPT SMP KASAN KASAN: null-ptr-deref in range [0x0000000000000010-0x0000000000000017] CPU: 0 PID: 12276 Comm: kworker/u4:10 Not tainted 5.19.0-rc3-syzkaller-00132-g3b89b511ea0c #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Workqueue: netns cleanup_net RIP: 0010:rlb_clear_slave+0x10e/0x690 drivers/net/bonding/bond_alb.c:393 Code: 8e fc 83 fb ff 0f 84 74 02 00 00 e8 cc 2a 8e fc 48 8b 44 24 08 89 dd 48 c1 e5 06 4c 8d 34 28 49 8d 7e 14 48 89 f8 48 c1 e8 03 <42> 0f b6 14 20 48 89 f8 83 e0 07 83 c0 03 38 d0 7c 08 84 d2 0f 85 RSP: 0018:ffffc90018a8f678 EFLAGS: 00010203 RAX: 0000000000000002 RBX: 0000000000000000 RCX: 0000000000000000 RDX: ffff88803375bb00 RSI: ffffffff84ec4ac4 RDI: 0000000000000014 RBP: 0000000000000000 R08: 0000000000000005 R09: 00000000ffffffff R10: 0000000000000000 R11: 0000000000000000 R12: dffffc0000000000 R13: ffff8880ac889000 R14: 0000000000000000 R15: ffff88815a668c80 FS: 0000000000000000(0000) GS:ffff8880b9a00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00005597077e10b0 CR3: 0000000026668000 CR4: 00000000003506f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: bond_alb_deinit_slave+0x43c/0x6b0 drivers/net/bonding/bond_alb.c:1663 __bond_release_one.cold+0x383/0xd53 drivers/net/bonding/bond_main.c:2370 bond_slave_netdev_event drivers/net/bonding/bond_main.c:3778 [inline] bond_netdev_event+0x993/0xad0 drivers/net/bonding/bond_main.c:3889 notifier_call_chain+0xb5/0x200 kernel/notifier.c:87 call_netdevice_notifiers_info+0xb5/0x130 net/core/dev.c:1945 call_netdevice_notifiers_extack net/core/dev.c:1983 [inline] call_netdevice_notifiers net/core/dev.c:1997 [inline] unregister_netdevice_many+0x948/0x18b0 net/core/dev.c:10839 default_device_exit_batch+0x449/0x590 net/core/dev.c:11333 ops_exit_list+0x125/0x170 net/core/net_namespace.c:167 cleanup_net+0x4ea/0xb00 net/core/net_namespace.c:594 process_one_work+0x996/0x1610 kernel/workqueue.c:2289 worker_thread+0x665/0x1080 kernel/workqueue.c:2436 kthread+0x2e9/0x3a0 kernel/kthread.c:376 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:302 Report 2: general protection fault, probably for non-canonical address 0xdffffc0000000006: 0000 [#1] PREEMPT SMP KASAN KASAN: null-ptr-deref in range [0x0000000000000030-0x0000000000000037] CPU: 1 PID: 5206 Comm: syz-executor.1 Not tainted 5.18.0-syzkaller-12108-g58f9d52ff689 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:rlb_req_update_slave_clients+0x109/0x2f0 drivers/net/bonding/bond_alb.c:502 Code: 5d 18 8f fc 41 80 3e 00 0f 85 a5 01 00 00 89 d8 48 c1 e0 06 49 03 84 24 68 01 00 00 48 8d 78 30 49 89 c7 48 89 fa 48 c1 ea 03 <80> 3c 2a 00 0f 85 98 01 00 00 4d 39 6f 30 75 83 e8 22 18 8f fc 49 RSP: 0018:ffffc9000300ee80 EFLAGS: 00010206 RAX: 0000000000000000 RBX: 0000000000000000 RCX: ffffc90016c11000 RDX: 0000000000000006 RSI: ffffffff84eb6bf3 RDI: 0000000000000030 RBP: dffffc0000000000 R08: 0000000000000005 R09: 00000000ffffffff R10: 0000000000000000 R11: 0000000000000000 R12: ffff888027c80c80 R13: ffff88807d7ff800 R14: ffffed1004f901bd R15: 0000000000000000 FS: 00007f6f46c58700(0000) GS:ffff8880b9b00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000020010000 CR3: 00000000516cc000 CR4: 00000000003506e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: alb_fasten_mac_swap+0x886/0xa80 drivers/net/bonding/bond_alb.c:1070 bond_alb_handle_active_change+0x624/0x1050 drivers/net/bonding/bond_alb.c:1765 bond_change_active_slave+0xfa1/0x29b0 drivers/net/bonding/bond_main.c:1173 bond_select_active_slave+0x23f/0xa50 drivers/net/bonding/bond_main.c:1253 bond_enslave+0x3b34/0x53b0 drivers/net/bonding/bond_main.c:2159 do_set_master+0x1c8/0x220 net/core/rtnetlink.c:2577 rtnl_newlink_create net/core/rtnetlink.c:3380 [inline] __rtnl_newlink+0x13ac/0x17e0 net/core/rtnetlink.c:3580 rtnl_newlink+0x64/0xa0 net/core/rtnetlink.c:3593 rtnetlink_rcv_msg+0x43a/0xc90 net/core/rtnetlink.c:6089 netlink_rcv_skb+0x153/0x420 net/netlink/af_netlink.c:2501 netlink_unicast_kernel net/netlink/af_netlink.c:1319 [inline] netlink_unicast+0x543/0x7f0 net/netlink/af_netlink.c:1345 netlink_sendmsg+0x917/0xe10 net/netlink/af_netlink.c:1921 sock_sendmsg_nosec net/socket.c:714 [inline] sock_sendmsg+0xcf/0x120 net/socket.c:734 ____sys_sendmsg+0x6eb/0x810 net/socket.c:2492 ___sys_sendmsg+0xf3/0x170 net/socket.c:2546 __sys_sendmsg net/socket.c:2575 [inline] __do_sys_sendmsg net/socket.c:2584 [inline] __se_sys_sendmsg net/socket.c:2582 [inline] __x64_sys_sendmsg+0x132/0x220 net/socket.c:2582 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x46/0xb0 RIP: 0033:0x7f6f45a89109 Code: ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 b8 ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007f6f46c58168 EFLAGS: 00000246 ORIG_RAX: 000000000000002e RAX: ffffffffffffffda RBX: 00007f6f45b9c030 RCX: 00007f6f45a89109 RDX: 0000000000000000 RSI: 0000000020000080 RDI: 0000000000000006 RBP: 00007f6f45ae308d R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 R13: 00007ffed99029af R14: 00007f6f46c58300 R15: 0000000000022000 Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reported-by: syzbot Signed-off-by: Eric Dumazet Cc: Jay Vosburgh Cc: Veaceslav Falico Cc: Andy Gospodarek Acked-by: Jay Vosburgh Link: https://lore.kernel.org/r/20220627102813.126264-1-edumazet@google.com Signed-off-by: Paolo Abeni Signed-off-by: Greg Kroah-Hartman --- drivers/net/bonding/bond_alb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c index bca36be884b8d3..a6a70b872ac4a0 100644 --- a/drivers/net/bonding/bond_alb.c +++ b/drivers/net/bonding/bond_alb.c @@ -1281,12 +1281,12 @@ int bond_alb_initialize(struct bonding *bond, int rlb_enabled) return res; if (rlb_enabled) { - bond->alb_info.rlb_enabled = 1; res = rlb_initialize(bond); if (res) { tlb_deinitialize(bond); return res; } + bond->alb_info.rlb_enabled = 1; } else { bond->alb_info.rlb_enabled = 0; } From 7227bc7bd10358b45a832ae9d0a981f564230103 Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Tue, 28 Jun 2022 13:43:49 +0200 Subject: [PATCH 0209/1056] net: phy: ax88772a: fix lost pause advertisement configuration commit fa152f626b24ec2ca3489100d8c5c0a0bce4e2ef upstream. In case of asix_ax88772a_link_change_notify() workaround, we run soft reset which will automatically clear MII_ADVERTISE configuration. The PHYlib framework do not know about changed configuration state of the PHY, so we need use phy_init_hw() to reinit PHY configuration. Fixes: dde258469257 ("net: usb/phy: asix: add support for ax88772A/C PHYs") Signed-off-by: Oleksij Rempel Reviewed-by: Andrew Lunn Link: https://lore.kernel.org/r/20220628114349.3929928-1-o.rempel@pengutronix.de Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/net/phy/ax88796b.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/phy/ax88796b.c b/drivers/net/phy/ax88796b.c index 45789633750553..0f1e617a26c915 100644 --- a/drivers/net/phy/ax88796b.c +++ b/drivers/net/phy/ax88796b.c @@ -88,8 +88,10 @@ static void asix_ax88772a_link_change_notify(struct phy_device *phydev) /* Reset PHY, otherwise MII_LPA will provide outdated information. * This issue is reproducible only with some link partner PHYs */ - if (phydev->state == PHY_NOLINK && phydev->drv->soft_reset) - phydev->drv->soft_reset(phydev); + if (phydev->state == PHY_NOLINK) { + phy_init_hw(phydev); + phy_start_aneg(phydev); + } } static struct phy_driver asix_driver[] = { From ef0af7d08d26c5333ff4944a559279464edf6f15 Mon Sep 17 00:00:00 2001 From: Yevhen Orlov Date: Wed, 29 Jun 2022 04:29:14 +0300 Subject: [PATCH 0210/1056] net: bonding: fix use-after-free after 802.3ad slave unbind commit 050133e1aa2cb49bb17be847d48a4431598ef562 upstream. commit 0622cab0341c ("bonding: fix 802.3ad aggregator reselection"), resolve case, when there is several aggregation groups in the same bond. bond_3ad_unbind_slave will invalidate (clear) aggregator when __agg_active_ports return zero. So, ad_clear_agg can be executed even, when num_of_ports!=0. Than bond_3ad_unbind_slave can be executed again for, previously cleared aggregator. NOTE: at this time bond_3ad_unbind_slave will not update slave ports list, because lag_ports==NULL. So, here we got slave ports, pointing to freed aggregator memory. Fix with checking actual number of ports in group (as was before commit 0622cab0341c ("bonding: fix 802.3ad aggregator reselection") ), before ad_clear_agg(). The KASAN logs are as follows: [ 767.617392] ================================================================== [ 767.630776] BUG: KASAN: use-after-free in bond_3ad_state_machine_handler+0x13dc/0x1470 [ 767.638764] Read of size 2 at addr ffff00011ba9d430 by task kworker/u8:7/767 [ 767.647361] CPU: 3 PID: 767 Comm: kworker/u8:7 Tainted: G O 5.15.11 #15 [ 767.655329] Hardware name: DNI AmazonGo1 A7040 board (DT) [ 767.660760] Workqueue: lacp_1 bond_3ad_state_machine_handler [ 767.666468] Call trace: [ 767.668930] dump_backtrace+0x0/0x2d0 [ 767.672625] show_stack+0x24/0x30 [ 767.675965] dump_stack_lvl+0x68/0x84 [ 767.679659] print_address_description.constprop.0+0x74/0x2b8 [ 767.685451] kasan_report+0x1f0/0x260 [ 767.689148] __asan_load2+0x94/0xd0 [ 767.692667] bond_3ad_state_machine_handler+0x13dc/0x1470 Fixes: 0622cab0341c ("bonding: fix 802.3ad aggregator reselection") Co-developed-by: Maksym Glubokiy Signed-off-by: Maksym Glubokiy Signed-off-by: Yevhen Orlov Acked-by: Jay Vosburgh Link: https://lore.kernel.org/r/20220629012914.361-1-yevhen.orlov@plvision.eu Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/net/bonding/bond_3ad.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c index a86b1f71762ea4..d7fb33c078e81a 100644 --- a/drivers/net/bonding/bond_3ad.c +++ b/drivers/net/bonding/bond_3ad.c @@ -2228,7 +2228,8 @@ void bond_3ad_unbind_slave(struct slave *slave) temp_aggregator->num_of_ports--; if (__agg_active_ports(temp_aggregator) == 0) { select_new_active_agg = temp_aggregator->is_active; - ad_clear_agg(temp_aggregator); + if (temp_aggregator->num_of_ports == 0) + ad_clear_agg(temp_aggregator); if (select_new_active_agg) { slave_info(bond->dev, slave->dev, "Removing an active aggregator\n"); /* select new active aggregator */ From 89296ac435e2cf8a5101f7fab8f0c7b754b92052 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Wed, 29 Jun 2022 10:39:25 +0530 Subject: [PATCH 0211/1056] powerpc/memhotplug: Add add_pages override for PPC commit ac790d09885d36143076e7e02825c541e8eee899 upstream. With commit ffa0b64e3be5 ("powerpc: Fix virt_addr_valid() for 64-bit Book3E & 32-bit") the kernel now validate the addr against high_memory value. This results in the below BUG_ON with dax pfns. [ 635.798741][T26531] kernel BUG at mm/page_alloc.c:5521! 1:mon> e cpu 0x1: Vector: 700 (Program Check) at [c000000007287630] pc: c00000000055ed48: free_pages.part.0+0x48/0x110 lr: c00000000053ca70: tlb_finish_mmu+0x80/0xd0 sp: c0000000072878d0 msr: 800000000282b033 current = 0xc00000000afabe00 paca = 0xc00000037ffff300 irqmask: 0x03 irq_happened: 0x05 pid = 26531, comm = 50-landscape-sy kernel BUG at :5521! Linux version 5.19.0-rc3-14659-g4ec05be7c2e1 (kvaneesh@ltc-boston8) (gcc (Ubuntu 9.4.0-1ubuntu1~20.04.1) 9.4.0, GNU ld (GNU Binutils for Ubuntu) 2.34) #625 SMP Thu Jun 23 00:35:43 CDT 2022 1:mon> t [link register ] c00000000053ca70 tlb_finish_mmu+0x80/0xd0 [c0000000072878d0] c00000000053ca54 tlb_finish_mmu+0x64/0xd0 (unreliable) [c000000007287900] c000000000539424 exit_mmap+0xe4/0x2a0 [c0000000072879e0] c00000000019fc1c mmput+0xcc/0x210 [c000000007287a20] c000000000629230 begin_new_exec+0x5e0/0xf40 [c000000007287ae0] c00000000070b3cc load_elf_binary+0x3ac/0x1e00 [c000000007287c10] c000000000627af0 bprm_execve+0x3b0/0xaf0 [c000000007287cd0] c000000000628414 do_execveat_common.isra.0+0x1e4/0x310 [c000000007287d80] c00000000062858c sys_execve+0x4c/0x60 [c000000007287db0] c00000000002c1b0 system_call_exception+0x160/0x2c0 [c000000007287e10] c00000000000c53c system_call_common+0xec/0x250 The fix is to make sure we update high_memory on memory hotplug. This is similar to what x86 does in commit 3072e413e305 ("mm/memory_hotplug: introduce add_pages") Fixes: ffa0b64e3be5 ("powerpc: Fix virt_addr_valid() for 64-bit Book3E & 32-bit") Signed-off-by: Aneesh Kumar K.V Reviewed-by: Kefeng Wang Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220629050925.31447-1-aneesh.kumar@linux.ibm.com Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/Kconfig | 4 ++++ arch/powerpc/mm/mem.c | 33 ++++++++++++++++++++++++++++++++- 2 files changed, 36 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 941618a7f7ff8d..27222b75d2a4b8 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -353,6 +353,10 @@ config ARCH_SUSPEND_NONZERO_CPU def_bool y depends on PPC_POWERNV || PPC_PSERIES +config ARCH_HAS_ADD_PAGES + def_bool y + depends on ARCH_ENABLE_MEMORY_HOTPLUG + config PPC_DCR_NATIVE bool diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 543a044560e935..6902f453c7451f 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -104,6 +104,37 @@ void __ref arch_remove_linear_mapping(u64 start, u64 size) vm_unmap_aliases(); } +/* + * After memory hotplug the variables max_pfn, max_low_pfn and high_memory need + * updating. + */ +static void update_end_of_memory_vars(u64 start, u64 size) +{ + unsigned long end_pfn = PFN_UP(start + size); + + if (end_pfn > max_pfn) { + max_pfn = end_pfn; + max_low_pfn = end_pfn; + high_memory = (void *)__va(max_pfn * PAGE_SIZE - 1) + 1; + } +} + +int __ref add_pages(int nid, unsigned long start_pfn, unsigned long nr_pages, + struct mhp_params *params) +{ + int ret; + + ret = __add_pages(nid, start_pfn, nr_pages, params); + if (ret) + return ret; + + /* update max_pfn, max_low_pfn and high_memory */ + update_end_of_memory_vars(start_pfn << PAGE_SHIFT, + nr_pages << PAGE_SHIFT); + + return ret; +} + int __ref arch_add_memory(int nid, u64 start, u64 size, struct mhp_params *params) { @@ -114,7 +145,7 @@ int __ref arch_add_memory(int nid, u64 start, u64 size, rc = arch_create_linear_mapping(nid, start, size, params); if (rc) return rc; - rc = __add_pages(nid, start_pfn, nr_pages, params); + rc = add_pages(nid, start_pfn, nr_pages, params); if (rc) arch_remove_linear_mapping(start, size); return rc; From d13a5b86e284d8abadaa01618f7411439556feb7 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Mon, 27 Jun 2022 14:40:48 +0200 Subject: [PATCH 0212/1056] nfc: nfcmrvl: Fix irq_of_parse_and_map() return value commit 5a478a653b4cca148d5c89832f007ec0809d7e6d upstream. The irq_of_parse_and_map() returns 0 on failure, not a negative ERRNO. Reported-by: Lv Ruyi Fixes: caf6e49bf6d0 ("NFC: nfcmrvl: add spi driver") Signed-off-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20220627124048.296253-1-krzysztof.kozlowski@linaro.org Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/nfc/nfcmrvl/i2c.c | 6 +++--- drivers/nfc/nfcmrvl/spi.c | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/nfc/nfcmrvl/i2c.c b/drivers/nfc/nfcmrvl/i2c.c index ceef81d93ac997..01329b91d59d54 100644 --- a/drivers/nfc/nfcmrvl/i2c.c +++ b/drivers/nfc/nfcmrvl/i2c.c @@ -167,9 +167,9 @@ static int nfcmrvl_i2c_parse_dt(struct device_node *node, pdata->irq_polarity = IRQF_TRIGGER_RISING; ret = irq_of_parse_and_map(node, 0); - if (ret < 0) { - pr_err("Unable to get irq, error: %d\n", ret); - return ret; + if (!ret) { + pr_err("Unable to get irq\n"); + return -EINVAL; } pdata->irq = ret; diff --git a/drivers/nfc/nfcmrvl/spi.c b/drivers/nfc/nfcmrvl/spi.c index 5b833a9a83f80f..abd75779194cd4 100644 --- a/drivers/nfc/nfcmrvl/spi.c +++ b/drivers/nfc/nfcmrvl/spi.c @@ -115,9 +115,9 @@ static int nfcmrvl_spi_parse_dt(struct device_node *node, } ret = irq_of_parse_and_map(node, 0); - if (ret < 0) { - pr_err("Unable to get irq, error: %d\n", ret); - return ret; + if (!ret) { + pr_err("Unable to get irq\n"); + return -EINVAL; } pdata->irq = ret; From eaf7e6fe4b07f979845484f112493a4203ed55e4 Mon Sep 17 00:00:00 2001 From: Michael Walle Date: Mon, 27 Jun 2022 19:06:42 +0200 Subject: [PATCH 0213/1056] NFC: nxp-nci: Don't issue a zero length i2c_master_read() commit eddd95b9423946aaacb55cac6a9b2cea8ab944fc upstream. There are packets which doesn't have a payload. In that case, the second i2c_master_read() will have a zero length. But because the NFC controller doesn't have any data left, it will NACK the I2C read and -ENXIO will be returned. In case there is no payload, just skip the second i2c master read. Fixes: 6be88670fc59 ("NFC: nxp-nci_i2c: Add I2C support to NXP NCI driver") Signed-off-by: Michael Walle Reviewed-by: Krzysztof Kozlowski Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/nfc/nxp-nci/i2c.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/nfc/nxp-nci/i2c.c b/drivers/nfc/nxp-nci/i2c.c index 7e451c10985df3..e8f3b35afbee4e 100644 --- a/drivers/nfc/nxp-nci/i2c.c +++ b/drivers/nfc/nxp-nci/i2c.c @@ -162,6 +162,9 @@ static int nxp_nci_i2c_nci_read(struct nxp_nci_i2c_phy *phy, skb_put_data(*skb, (void *)&header, NCI_CTRL_HDR_SIZE); + if (!header.plen) + return 0; + r = i2c_master_recv(client, skb_put(*skb, header.plen), header.plen); if (r != header.plen) { nfc_err(&client->dev, From 35fcb2ba35b4d9b592b558c3bcc6e0d90e213588 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Fri, 24 Jun 2022 12:24:31 -0400 Subject: [PATCH 0214/1056] tipc: move bc link creation back to tipc_node_create commit cb8092d70a6f5f01ec1490fce4d35efed3ed996c upstream. Shuang Li reported a NULL pointer dereference crash: [] BUG: kernel NULL pointer dereference, address: 0000000000000068 [] RIP: 0010:tipc_link_is_up+0x5/0x10 [tipc] [] Call Trace: [] [] tipc_bcast_rcv+0xa2/0x190 [tipc] [] tipc_node_bc_rcv+0x8b/0x200 [tipc] [] tipc_rcv+0x3af/0x5b0 [tipc] [] tipc_udp_recv+0xc7/0x1e0 [tipc] It was caused by the 'l' passed into tipc_bcast_rcv() is NULL. When it creates a node in tipc_node_check_dest(), after inserting the new node into hashtable in tipc_node_create(), it creates the bc link. However, there is a gap between this insert and bc link creation, a bc packet may come in and get the node from the hashtable then try to dereference its bc link, which is NULL. This patch is to fix it by moving the bc link creation before inserting into the hashtable. Note that for a preliminary node becoming "real", the bc link creation should also be called before it's rehashed, as we don't create it for preliminary nodes. Fixes: 4cbf8ac2fe5a ("tipc: enable creating a "preliminary" node") Reported-by: Shuang Li Signed-off-by: Xin Long Acked-by: Jon Maloy Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/tipc/node.c | 41 ++++++++++++++++++++++------------------- 1 file changed, 22 insertions(+), 19 deletions(-) diff --git a/net/tipc/node.c b/net/tipc/node.c index 6ef95ce565bd37..b48d97cbbe29c5 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -472,8 +472,8 @@ struct tipc_node *tipc_node_create(struct net *net, u32 addr, u8 *peer_id, bool preliminary) { struct tipc_net *tn = net_generic(net, tipc_net_id); + struct tipc_link *l, *snd_l = tipc_bc_sndlink(net); struct tipc_node *n, *temp_node; - struct tipc_link *l; unsigned long intv; int bearer_id; int i; @@ -488,6 +488,16 @@ struct tipc_node *tipc_node_create(struct net *net, u32 addr, u8 *peer_id, goto exit; /* A preliminary node becomes "real" now, refresh its data */ tipc_node_write_lock(n); + if (!tipc_link_bc_create(net, tipc_own_addr(net), addr, peer_id, U16_MAX, + tipc_link_min_win(snd_l), tipc_link_max_win(snd_l), + n->capabilities, &n->bc_entry.inputq1, + &n->bc_entry.namedq, snd_l, &n->bc_entry.link)) { + pr_warn("Broadcast rcv link refresh failed, no memory\n"); + tipc_node_write_unlock_fast(n); + tipc_node_put(n); + n = NULL; + goto exit; + } n->preliminary = false; n->addr = addr; hlist_del_rcu(&n->hash); @@ -567,7 +577,16 @@ struct tipc_node *tipc_node_create(struct net *net, u32 addr, u8 *peer_id, n->signature = INVALID_NODE_SIG; n->active_links[0] = INVALID_BEARER_ID; n->active_links[1] = INVALID_BEARER_ID; - n->bc_entry.link = NULL; + if (!preliminary && + !tipc_link_bc_create(net, tipc_own_addr(net), addr, peer_id, U16_MAX, + tipc_link_min_win(snd_l), tipc_link_max_win(snd_l), + n->capabilities, &n->bc_entry.inputq1, + &n->bc_entry.namedq, snd_l, &n->bc_entry.link)) { + pr_warn("Broadcast rcv link creation failed, no memory\n"); + kfree(n); + n = NULL; + goto exit; + } tipc_node_get(n); timer_setup(&n->timer, tipc_node_timeout, 0); /* Start a slow timer anyway, crypto needs it */ @@ -1155,7 +1174,7 @@ void tipc_node_check_dest(struct net *net, u32 addr, bool *respond, bool *dupl_addr) { struct tipc_node *n; - struct tipc_link *l, *snd_l; + struct tipc_link *l; struct tipc_link_entry *le; bool addr_match = false; bool sign_match = false; @@ -1175,22 +1194,6 @@ void tipc_node_check_dest(struct net *net, u32 addr, return; tipc_node_write_lock(n); - if (unlikely(!n->bc_entry.link)) { - snd_l = tipc_bc_sndlink(net); - if (!tipc_link_bc_create(net, tipc_own_addr(net), - addr, peer_id, U16_MAX, - tipc_link_min_win(snd_l), - tipc_link_max_win(snd_l), - n->capabilities, - &n->bc_entry.inputq1, - &n->bc_entry.namedq, snd_l, - &n->bc_entry.link)) { - pr_warn("Broadcast rcv link creation failed, no mem\n"); - tipc_node_write_unlock_fast(n); - tipc_node_put(n); - return; - } - } le = &n->links[b->identity]; From c8fb40fd7bb7336a353cfaaccfaf8d127636aaa0 Mon Sep 17 00:00:00 2001 From: Tong Zhang Date: Sun, 26 Jun 2022 21:33:48 -0700 Subject: [PATCH 0215/1056] epic100: fix use after free on rmmod commit 8ee9d82cd0a45e7d050ade598c9f33032a0f2891 upstream. epic_close() calls epic_rx() and uses dma buffer, but in epic_remove_one() we already freed the dma buffer. To fix this issue, reorder function calls like in the .probe function. BUG: KASAN: use-after-free in epic_rx+0xa6/0x7e0 [epic100] Call Trace: epic_rx+0xa6/0x7e0 [epic100] epic_close+0xec/0x2f0 [epic100] unregister_netdev+0x18/0x20 epic_remove_one+0xaa/0xf0 [epic100] Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reported-by: Yilun Wu Signed-off-by: Tong Zhang Reviewed-by: Francois Romieu Link: https://lore.kernel.org/r/20220627043351.25615-1-ztong0001@gmail.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/smsc/epic100.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/smsc/epic100.c b/drivers/net/ethernet/smsc/epic100.c index 44daf79a8f9725..f3b1af9a59e8a7 100644 --- a/drivers/net/ethernet/smsc/epic100.c +++ b/drivers/net/ethernet/smsc/epic100.c @@ -1513,14 +1513,14 @@ static void epic_remove_one(struct pci_dev *pdev) struct net_device *dev = pci_get_drvdata(pdev); struct epic_private *ep = netdev_priv(dev); + unregister_netdev(dev); dma_free_coherent(&pdev->dev, TX_TOTAL_SIZE, ep->tx_ring, ep->tx_ring_dma); dma_free_coherent(&pdev->dev, RX_TOTAL_SIZE, ep->rx_ring, ep->rx_ring_dma); - unregister_netdev(dev); pci_iounmap(pdev, ep->ioaddr); - pci_release_regions(pdev); free_netdev(dev); + pci_release_regions(pdev); pci_disable_device(pdev); /* pci_power_off(pdev, -1); */ } From 50fefe57f45e56d6a567273eb5be25778046c941 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 30 Jun 2022 14:42:05 -0600 Subject: [PATCH 0216/1056] io_uring: ensure that send/sendmsg and recv/recvmsg check sqe->ioprio commit 73911426aaaadbae54fa72359b33a7b6a56947db upstream. All other opcodes correctly check if this is set and -EINVAL if it is and they don't support that field, for some reason the these were forgotten. This was unified a bit differently in the upstream tree, but had the same effect as making sure we error on this field. Rather than have a painful backport of the upstream commit, just fixup the mentioned opcodes. Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- fs/io_uring.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/io_uring.c b/fs/io_uring.c index a8470a98f84d84..1893237403247d 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -4788,6 +4788,8 @@ static int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) return -EINVAL; if (unlikely(sqe->addr2 || sqe->file_index)) return -EINVAL; + if (unlikely(sqe->addr2 || sqe->file_index || sqe->ioprio)) + return -EINVAL; sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr)); sr->len = READ_ONCE(sqe->len); @@ -5011,6 +5013,8 @@ static int io_recvmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) return -EINVAL; if (unlikely(sqe->addr2 || sqe->file_index)) return -EINVAL; + if (unlikely(sqe->addr2 || sqe->file_index || sqe->ioprio)) + return -EINVAL; sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr)); sr->len = READ_ONCE(sqe->len); From b43a47c1c5c76086ceb0d618297f04139e17b857 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Fri, 24 Jun 2022 13:23:34 +0200 Subject: [PATCH 0217/1056] ACPI: video: Change how we determine if brightness key-presses are handled commit 3a0cf7ab8df3878a7e2f3d29275b785cf4e7afb6 upstream. Some systems have an ACPI video bus but not ACPI video devices with backlight capability. On these devices brightness key-presses are (logically) not reported through the ACPI video bus. Change how acpi_video_handles_brightness_key_presses() determines if brightness key-presses are handled by the ACPI video driver to avoid vendor specific drivers/platform/x86 drivers filtering out their brightness key-presses even though they are the only ones reporting these presses. Fixes: ed83c9171829 ("platform/x86: panasonic-laptop: Resolve hotkey double trigger bug") Reported-and-tested-by: Stefan Seyfried Reported-and-tested-by: Kenneth Chan Signed-off-by: Hans de Goede Acked-by: Rafael J. Wysocki Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20220624112340.10130-2-hdegoede@redhat.com Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/acpi_video.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/drivers/acpi/acpi_video.c b/drivers/acpi/acpi_video.c index 42ede059728ce3..007deb3a8ea37a 100644 --- a/drivers/acpi/acpi_video.c +++ b/drivers/acpi/acpi_video.c @@ -73,6 +73,7 @@ module_param(device_id_scheme, bool, 0444); static int only_lcd = -1; module_param(only_lcd, int, 0444); +static bool has_backlight; static int register_count; static DEFINE_MUTEX(register_count_mutex); static DEFINE_MUTEX(video_list_lock); @@ -1222,6 +1223,9 @@ acpi_video_bus_get_one_device(struct acpi_device *device, acpi_video_device_bind(video, data); acpi_video_device_find_cap(data); + if (data->cap._BCM && data->cap._BCL) + has_backlight = true; + mutex_lock(&video->device_list_lock); list_add_tail(&data->entry, &video->video_device_list); mutex_unlock(&video->device_list_lock); @@ -2251,6 +2255,7 @@ void acpi_video_unregister(void) if (register_count) { acpi_bus_unregister_driver(&acpi_video_bus); register_count = 0; + has_backlight = false; } mutex_unlock(®ister_count_mutex); } @@ -2272,13 +2277,7 @@ void acpi_video_unregister_backlight(void) bool acpi_video_handles_brightness_key_presses(void) { - bool have_video_busses; - - mutex_lock(&video_list_lock); - have_video_busses = !list_empty(&video_bus_head); - mutex_unlock(&video_list_lock); - - return have_video_busses && + return has_backlight && (report_key_events & REPORT_BRIGHTNESS_KEY_EVENTS); } EXPORT_SYMBOL(acpi_video_handles_brightness_key_presses); From 674a641e5b67e16ba3112eacd680ff87b38539de Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 24 Jun 2022 15:30:20 +0000 Subject: [PATCH 0218/1056] tunnels: do not assume mac header is set in skb_tunnel_check_pmtu() commit 853a7614880231747040cada91d2b8d2e995c51a upstream. Recently added debug in commit f9aefd6b2aa3 ("net: warn if mac header was not set") caught a bug in skb_tunnel_check_pmtu(), as shown in this syzbot report [1]. In ndo_start_xmit() paths, there is really no need to use skb->mac_header, because skb->data is supposed to point at it. [1] WARNING: CPU: 1 PID: 8604 at include/linux/skbuff.h:2784 skb_mac_header_len include/linux/skbuff.h:2784 [inline] WARNING: CPU: 1 PID: 8604 at include/linux/skbuff.h:2784 skb_tunnel_check_pmtu+0x5de/0x2f90 net/ipv4/ip_tunnel_core.c:413 Modules linked in: CPU: 1 PID: 8604 Comm: syz-executor.3 Not tainted 5.19.0-rc2-syzkaller-00443-g8720bd951b8e #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:skb_mac_header_len include/linux/skbuff.h:2784 [inline] RIP: 0010:skb_tunnel_check_pmtu+0x5de/0x2f90 net/ipv4/ip_tunnel_core.c:413 Code: 00 00 00 00 fc ff df 4c 89 fa 48 c1 ea 03 80 3c 02 00 0f 84 b9 fe ff ff 4c 89 ff e8 7c 0f d7 f9 e9 ac fe ff ff e8 c2 13 8a f9 <0f> 0b e9 28 fc ff ff e8 b6 13 8a f9 48 8b 54 24 70 48 b8 00 00 00 RSP: 0018:ffffc90002e4f520 EFLAGS: 00010212 RAX: 0000000000000324 RBX: ffff88804d5fd500 RCX: ffffc90005b52000 RDX: 0000000000040000 RSI: ffffffff87f05e3e RDI: 0000000000000003 RBP: ffffc90002e4f650 R08: 0000000000000003 R09: 000000000000ffff R10: 000000000000ffff R11: 0000000000000000 R12: 000000000000ffff R13: 0000000000000000 R14: 000000000000ffcd R15: 000000000000001f FS: 00007f3babba9700(0000) GS:ffff8880b9b00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000020000080 CR3: 0000000075319000 CR4: 00000000003506e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: geneve_xmit_skb drivers/net/geneve.c:927 [inline] geneve_xmit+0xcf8/0x35d0 drivers/net/geneve.c:1107 __netdev_start_xmit include/linux/netdevice.h:4805 [inline] netdev_start_xmit include/linux/netdevice.h:4819 [inline] __dev_direct_xmit+0x500/0x730 net/core/dev.c:4309 dev_direct_xmit include/linux/netdevice.h:3007 [inline] packet_direct_xmit+0x1b8/0x2c0 net/packet/af_packet.c:282 packet_snd net/packet/af_packet.c:3073 [inline] packet_sendmsg+0x21f4/0x55d0 net/packet/af_packet.c:3104 sock_sendmsg_nosec net/socket.c:714 [inline] sock_sendmsg+0xcf/0x120 net/socket.c:734 ____sys_sendmsg+0x6eb/0x810 net/socket.c:2489 ___sys_sendmsg+0xf3/0x170 net/socket.c:2543 __sys_sendmsg net/socket.c:2572 [inline] __do_sys_sendmsg net/socket.c:2581 [inline] __se_sys_sendmsg net/socket.c:2579 [inline] __x64_sys_sendmsg+0x132/0x220 net/socket.c:2579 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x46/0xb0 RIP: 0033:0x7f3baaa89109 Code: ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 b8 ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007f3babba9168 EFLAGS: 00000246 ORIG_RAX: 000000000000002e RAX: ffffffffffffffda RBX: 00007f3baab9bf60 RCX: 00007f3baaa89109 RDX: 0000000000000000 RSI: 0000000020000a00 RDI: 0000000000000003 RBP: 00007f3baaae305d R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 R13: 00007ffe74f2543f R14: 00007f3babba9300 R15: 0000000000022000 Fixes: 4cb47a8644cc ("tunnels: PMTU discovery support for directly bridged IP packets") Reported-by: syzbot Signed-off-by: Eric Dumazet Cc: Stefano Brivio Reviewed-by: Stefano Brivio Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/ip_tunnel_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index 6b2dc7b2b6127d..cc1caab4a65492 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c @@ -410,7 +410,7 @@ int skb_tunnel_check_pmtu(struct sk_buff *skb, struct dst_entry *encap_dst, u32 mtu = dst_mtu(encap_dst) - headroom; if ((skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu)) || - (!skb_is_gso(skb) && (skb->len - skb_mac_header_len(skb)) <= mtu)) + (!skb_is_gso(skb) && (skb->len - skb_network_offset(skb)) <= mtu)) return 0; skb_dst_update_pmtu_no_confirm(skb, mtu); From d6371303b4db856cc763018bd2e34b2d0f69199f Mon Sep 17 00:00:00 2001 From: katrinzhou Date: Tue, 28 Jun 2022 11:50:30 +0800 Subject: [PATCH 0219/1056] ipv6/sit: fix ipip6_tunnel_get_prl return value commit adabdd8f6acabc0c3fdbba2e7f5a2edd9c5ef22d upstream. When kcalloc fails, ipip6_tunnel_get_prl() should return -ENOMEM. Move the position of label "out" to return correctly. Addresses-Coverity: ("Unused value") Fixes: 300aaeeaab5f ("[IPV6] SIT: Add SIOCGETPRL ioctl to get/dump PRL.") Signed-off-by: katrinzhou Reviewed-by: Eric Dumazet Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20220628035030.1039171-1-zys.zljxml@gmail.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/ipv6/sit.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 626cb53aa57abb..637cd99bd7a641 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -323,8 +323,6 @@ static int ipip6_tunnel_get_prl(struct net_device *dev, struct ip_tunnel_prl __u kcalloc(cmax, sizeof(*kp), GFP_KERNEL_ACCOUNT | __GFP_NOWARN) : NULL; - rcu_read_lock(); - ca = min(t->prl_count, cmax); if (!kp) { @@ -341,7 +339,7 @@ static int ipip6_tunnel_get_prl(struct net_device *dev, struct ip_tunnel_prl __u } } - c = 0; + rcu_read_lock(); for_each_prl_rcu(t->prl) { if (c >= cmax) break; @@ -353,7 +351,7 @@ static int ipip6_tunnel_get_prl(struct net_device *dev, struct ip_tunnel_prl __u if (kprl.addr != htonl(INADDR_ANY)) break; } -out: + rcu_read_unlock(); len = sizeof(*kp) * c; @@ -362,7 +360,7 @@ static int ipip6_tunnel_get_prl(struct net_device *dev, struct ip_tunnel_prl __u ret = -EFAULT; kfree(kp); - +out: return ret; } From 2930ee1a166df0e12fe9fec4b79cc3dc9634ae45 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 28 Jun 2022 12:12:48 +0000 Subject: [PATCH 0220/1056] ipv6: fix lockdep splat in in6_dump_addrs() commit 4e43e64d0f1332fcc503babad4dc31aead7131ca upstream. As reported by syzbot, we should not use rcu_dereference() when rcu_read_lock() is not held. WARNING: suspicious RCU usage 5.19.0-rc2-syzkaller #0 Not tainted net/ipv6/addrconf.c:5175 suspicious rcu_dereference_check() usage! other info that might help us debug this: rcu_scheduler_active = 2, debug_locks = 1 1 lock held by syz-executor326/3617: #0: ffffffff8d5848e8 (rtnl_mutex){+.+.}-{3:3}, at: netlink_dump+0xae/0xc20 net/netlink/af_netlink.c:2223 stack backtrace: CPU: 0 PID: 3617 Comm: syz-executor326 Not tainted 5.19.0-rc2-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0xcd/0x134 lib/dump_stack.c:106 in6_dump_addrs+0x12d1/0x1790 net/ipv6/addrconf.c:5175 inet6_dump_addr+0x9c1/0xb50 net/ipv6/addrconf.c:5300 netlink_dump+0x541/0xc20 net/netlink/af_netlink.c:2275 __netlink_dump_start+0x647/0x900 net/netlink/af_netlink.c:2380 netlink_dump_start include/linux/netlink.h:245 [inline] rtnetlink_rcv_msg+0x73e/0xc90 net/core/rtnetlink.c:6046 netlink_rcv_skb+0x153/0x420 net/netlink/af_netlink.c:2501 netlink_unicast_kernel net/netlink/af_netlink.c:1319 [inline] netlink_unicast+0x543/0x7f0 net/netlink/af_netlink.c:1345 netlink_sendmsg+0x917/0xe10 net/netlink/af_netlink.c:1921 sock_sendmsg_nosec net/socket.c:714 [inline] sock_sendmsg+0xcf/0x120 net/socket.c:734 ____sys_sendmsg+0x6eb/0x810 net/socket.c:2492 ___sys_sendmsg+0xf3/0x170 net/socket.c:2546 __sys_sendmsg net/socket.c:2575 [inline] __do_sys_sendmsg net/socket.c:2584 [inline] __se_sys_sendmsg net/socket.c:2582 [inline] __x64_sys_sendmsg+0x132/0x220 net/socket.c:2582 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x46/0xb0 Fixes: 88e2ca308094 ("mld: convert ifmcaddr6 to RCU") Reported-by: syzbot Signed-off-by: Eric Dumazet Cc: Taehee Yoo Link: https://lore.kernel.org/r/20220628121248.858695-1-edumazet@google.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/ipv6/addrconf.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 89afd7bba33cc3..6dcf034835ecde 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -5166,9 +5166,9 @@ static int in6_dump_addrs(struct inet6_dev *idev, struct sk_buff *skb, fillargs->event = RTM_GETMULTICAST; /* multicast address */ - for (ifmca = rcu_dereference(idev->mc_list); + for (ifmca = rtnl_dereference(idev->mc_list); ifmca; - ifmca = rcu_dereference(ifmca->next), ip_idx++) { + ifmca = rtnl_dereference(ifmca->next), ip_idx++) { if (ip_idx < s_ip_idx) continue; err = inet6_fill_ifmcaddr(skb, ifmca, fillargs); From 9db9e649f88f5eebecbed618746086fbf8452857 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Wed, 29 Jun 2022 10:02:05 +0300 Subject: [PATCH 0221/1056] mlxsw: spectrum_router: Fix rollback in tunnel next hop init commit 665030fd0c1ed9f505932e6e73e7a2c788787a0a upstream. In mlxsw_sp_nexthop6_init(), a next hop is always added to the router linked list, and mlxsw_sp_nexthop_type_init() is invoked afterwards. When that function results in an error, the next hop will not have been removed from the linked list. As the error is propagated upwards and the caller frees the next hop object, the linked list ends up holding an invalid object. A similar issue comes up with mlxsw_sp_nexthop4_init(), where rollback block does exist, however does not include the linked list removal. Both IPv6 and IPv4 next hops have a similar issue with next-hop counter rollbacks. As these were introduced in the same patchset as the next hop linked list, include the cleanup in this patch. Fixes: dbe4598c1e92 ("mlxsw: spectrum_router: Keep nexthops in a linked list") Fixes: a5390278a5eb ("mlxsw: spectrum: Add support for setting counters on nexthops") Signed-off-by: Petr Machata Reviewed-by: Amit Cohen Signed-off-by: Ido Schimmel Link: https://lore.kernel.org/r/20220629070205.803952-1-idosch@nvidia.com Signed-off-by: Paolo Abeni Signed-off-by: Greg Kroah-Hartman --- .../net/ethernet/mellanox/mlxsw/spectrum_router.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 19bb3ca0515e21..d7d90cdce4f6dc 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -4293,6 +4293,8 @@ static int mlxsw_sp_nexthop4_init(struct mlxsw_sp *mlxsw_sp, return 0; err_nexthop_neigh_init: + list_del(&nh->router_list_node); + mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh); mlxsw_sp_nexthop_remove(mlxsw_sp, nh); return err; } @@ -6578,6 +6580,7 @@ static int mlxsw_sp_nexthop6_init(struct mlxsw_sp *mlxsw_sp, const struct fib6_info *rt) { struct net_device *dev = rt->fib6_nh->fib_nh_dev; + int err; nh->nhgi = nh_grp->nhgi; nh->nh_weight = rt->fib6_nh->fib_nh_weight; @@ -6593,7 +6596,16 @@ static int mlxsw_sp_nexthop6_init(struct mlxsw_sp *mlxsw_sp, return 0; nh->ifindex = dev->ifindex; - return mlxsw_sp_nexthop_type_init(mlxsw_sp, nh, dev); + err = mlxsw_sp_nexthop_type_init(mlxsw_sp, nh, dev); + if (err) + goto err_nexthop_type_init; + + return 0; + +err_nexthop_type_init: + list_del(&nh->router_list_node); + mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh); + return err; } static void mlxsw_sp_nexthop6_fini(struct mlxsw_sp *mlxsw_sp, From c28f955155037586132f8aad573e06a41d260038 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 29 Jun 2022 11:19:10 -0700 Subject: [PATCH 0222/1056] net: tun: avoid disabling NAPI twice commit ff1fa2081d173b01cebe2fbf0a2d0f1cee9ce4b5 upstream. Eric reports that syzbot made short work out of my speculative fix. Indeed when queue gets detached its tfile->tun remains, so we would try to stop NAPI twice with a detach(), close() sequence. Alternative fix would be to move tun_napi_disable() to tun_detach_all() and let the NAPI run after the queue has been detached. Fixes: a8fc8cb5692a ("net: tun: stop NAPI when detaching queues") Reported-by: syzbot Reported-by: Eric Dumazet Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20220629181911.372047-1-kuba@kernel.org Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/net/tun.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index cdeee7f7074938..ea60453fe69aac 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -641,7 +641,8 @@ static void __tun_detach(struct tun_file *tfile, bool clean) tun = rtnl_dereference(tfile->tun); if (tun && clean) { - tun_napi_disable(tfile); + if (!tfile->detached) + tun_napi_disable(tfile); tun_napi_del(tfile); } From 010d7c422296b34d6862f9b34d5a04fea89ea6d8 Mon Sep 17 00:00:00 2001 From: Leah Rumancik Date: Thu, 30 Jun 2022 13:52:28 -0700 Subject: [PATCH 0223/1056] MAINTAINERS: add Leah as xfs maintainer for 5.15.y Update MAINTAINERS for xfs in an effort to help direct bots/questions about xfs in 5.15.y. Note: 5.10.y and 5.4.y will have different updates to their respective MAINTAINERS files for this effort. Suggested-by: Darrick J. Wong Signed-off-by: Leah Rumancik Reviewed-by: Darrick J. Wong Signed-off-by: Greg Kroah-Hartman --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 393706e85ba27a..a60d7e0466afa3 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -20579,6 +20579,7 @@ F: drivers/xen/*swiotlb* XFS FILESYSTEM C: irc://irc.oftc.net/xfs +M: Leah Rumancik M: Darrick J. Wong M: linux-xfs@vger.kernel.org L: linux-xfs@vger.kernel.org From e11cdd74519ec2443a3ae66fa921f9ac45a15527 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 23 Jun 2022 05:04:36 +0000 Subject: [PATCH 0224/1056] tcp: add a missing nf_reset_ct() in 3WHS handling commit 6f0012e35160cd08a53e46e3b3bbf724b92dfe68 upstream. When the third packet of 3WHS connection establishment contains payload, it is added into socket receive queue without the XFRM check and the drop of connection tracking context. This means that if the data is left unread in the socket receive queue, conntrack module can not be unloaded. As most applications usually reads the incoming data immediately after accept(), bug has been hiding for quite a long time. Commit 68822bdf76f1 ("net: generalize skb freeing deferral to per-cpu lists") exposed this bug because even if the application reads this data, the skb with nfct state could stay in a per-cpu cache for an arbitrary time, if said cpu no longer process RX softirqs. Many thanks to Ilya Maximets for reporting this issue, and for testing various patches: https://lore.kernel.org/netdev/20220619003919.394622-1-i.maximets@ovn.org/ Note that I also added a missing xfrm4_policy_check() call, although this is probably not a big issue, as the SYN packet should have been dropped earlier. Fixes: b59c270104f0 ("[NETFILTER]: Keep conntrack reference until IPsec policy checks are done") Reported-by: Ilya Maximets Signed-off-by: Eric Dumazet Cc: Florian Westphal Cc: Pablo Neira Ayuso Cc: Steffen Klassert Tested-by: Ilya Maximets Reviewed-by: Ilya Maximets Link: https://lore.kernel.org/r/20220623050436.1290307-1-edumazet@google.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp_ipv4.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index a189625098ba5d..5d94822fd5069f 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2014,7 +2014,8 @@ int tcp_v4_rcv(struct sk_buff *skb) struct sock *nsk; sk = req->rsk_listener; - if (unlikely(tcp_v4_inbound_md5_hash(sk, skb, dif, sdif))) { + if (unlikely(!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb) || + tcp_v4_inbound_md5_hash(sk, skb, dif, sdif))) { sk_drops_add(sk, skb); reqsk_put(req); goto discard_it; @@ -2061,6 +2062,7 @@ int tcp_v4_rcv(struct sk_buff *skb) } goto discard_and_relse; } + nf_reset_ct(skb); if (nsk == sk) { reqsk_put(req); tcp_v4_restore_cb(skb); From 34ec62bc44b03e7376954a9391e654f77c7d99b3 Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Fri, 14 Jan 2022 22:09:51 +0530 Subject: [PATCH 0225/1056] selftests/bpf: Add test_verifier support to fixup kfunc call insns commit 0201b80772ac2b712bbbfe783cdb731fdfb4247e upstream. This allows us to add tests (esp. negative tests) where we only want to ensure the program doesn't pass through the verifier, and also verify the error. The next commit will add the tests making use of this. Signed-off-by: Kumar Kartikeya Dwivedi Link: https://lore.kernel.org/r/20220114163953.1455836-9-memxor@gmail.com Signed-off-by: Alexei Starovoitov [PHLin: backport due to lack of fixup_map_timer] Signed-off-by: Po-Hsu Lin Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/bpf/test_verifier.c | 28 +++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 3a9e332c5e360e..68a9a897185cdf 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -31,6 +31,7 @@ #include #include +#include #include #include @@ -63,6 +64,11 @@ static bool unpriv_disabled = false; static int skips; static bool verbose = false; +struct kfunc_btf_id_pair { + const char *kfunc; + int insn_idx; +}; + struct bpf_test { const char *descr; struct bpf_insn insns[MAX_INSNS]; @@ -88,6 +94,7 @@ struct bpf_test { int fixup_map_event_output[MAX_FIXUPS]; int fixup_map_reuseport_array[MAX_FIXUPS]; int fixup_map_ringbuf[MAX_FIXUPS]; + struct kfunc_btf_id_pair fixup_kfunc_btf_id[MAX_FIXUPS]; /* Expected verifier log output for result REJECT or VERBOSE_ACCEPT. * Can be a tab-separated sequence of expected strings. An empty string * means no log verification. @@ -718,6 +725,7 @@ static void do_test_fixup(struct bpf_test *test, enum bpf_prog_type prog_type, int *fixup_map_event_output = test->fixup_map_event_output; int *fixup_map_reuseport_array = test->fixup_map_reuseport_array; int *fixup_map_ringbuf = test->fixup_map_ringbuf; + struct kfunc_btf_id_pair *fixup_kfunc_btf_id = test->fixup_kfunc_btf_id; if (test->fill_helper) { test->fill_insns = calloc(MAX_TEST_INSNS, sizeof(struct bpf_insn)); @@ -903,6 +911,26 @@ static void do_test_fixup(struct bpf_test *test, enum bpf_prog_type prog_type, fixup_map_ringbuf++; } while (*fixup_map_ringbuf); } + + /* Patch in kfunc BTF IDs */ + if (fixup_kfunc_btf_id->kfunc) { + struct btf *btf; + int btf_id; + + do { + btf_id = 0; + btf = btf__load_vmlinux_btf(); + if (btf) { + btf_id = btf__find_by_name_kind(btf, + fixup_kfunc_btf_id->kfunc, + BTF_KIND_FUNC); + btf_id = btf_id < 0 ? 0 : btf_id; + } + btf__free(btf); + prog[fixup_kfunc_btf_id->insn_idx].imm = btf_id; + fixup_kfunc_btf_id++; + } while (fixup_kfunc_btf_id->kfunc); + } } struct libcap { From 58f64962a6971796891763b82bdb6a27f18d0191 Mon Sep 17 00:00:00 2001 From: Shuah Khan Date: Thu, 9 Dec 2021 16:19:09 -0700 Subject: [PATCH 0226/1056] selftests/rseq: remove ARRAY_SIZE define from individual tests commit 07ad4f7629d4802ff0d962b0ac23ea6445964e2a upstream. ARRAY_SIZE is defined in several selftests. Remove definitions from individual test files and include header file for the define instead. ARRAY_SIZE define is added in a separate patch to prepare for this change. Remove ARRAY_SIZE from rseq tests and pickup the one defined in kselftest.h. Signed-off-by: Shuah Khan Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/rseq/basic_percpu_ops_test.c | 3 +-- tools/testing/selftests/rseq/rseq.c | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/rseq/basic_percpu_ops_test.c b/tools/testing/selftests/rseq/basic_percpu_ops_test.c index eb3f6db36d369f..b953a52ff706c8 100644 --- a/tools/testing/selftests/rseq/basic_percpu_ops_test.c +++ b/tools/testing/selftests/rseq/basic_percpu_ops_test.c @@ -9,10 +9,9 @@ #include #include +#include "../kselftest.h" #include "rseq.h" -#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) - struct percpu_lock_entry { intptr_t v; } __attribute__((aligned(128))); diff --git a/tools/testing/selftests/rseq/rseq.c b/tools/testing/selftests/rseq/rseq.c index 7159eb777fd34a..fb440dfca15879 100644 --- a/tools/testing/selftests/rseq/rseq.c +++ b/tools/testing/selftests/rseq/rseq.c @@ -27,10 +27,9 @@ #include #include +#include "../kselftest.h" #include "rseq.h" -#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) - __thread volatile struct rseq __rseq_abi = { .cpu_id = RSEQ_CPU_ID_UNINITIALIZED, }; From 94a913fe62713171ade8a49c18c28571de2f7782 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Mon, 24 Jan 2022 12:12:39 -0500 Subject: [PATCH 0227/1056] selftests/rseq: introduce own copy of rseq uapi header commit 5c105d55a9dc9e01535116ccfc26e703168a574f upstream. The Linux kernel rseq uapi header has a broken layout for the rseq_cs.ptr field on 32-bit little endian architectures. The entire rseq_cs.ptr field is planned for removal, leaving only the 64-bit rseq_cs.ptr64 field available. Both glibc and librseq use their own copy of the Linux kernel uapi header, where they introduce proper union fields to access to the 32-bit low order bits of the rseq_cs pointer on 32-bit architectures. Introduce a copy of the Linux kernel uapi headers in the Linux kernel selftests. Signed-off-by: Mathieu Desnoyers Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20220124171253.22072-2-mathieu.desnoyers@efficios.com Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/rseq/rseq-abi.h | 151 ++++++++++++++++++++++++ tools/testing/selftests/rseq/rseq.c | 14 +-- tools/testing/selftests/rseq/rseq.h | 10 +- 3 files changed, 161 insertions(+), 14 deletions(-) create mode 100644 tools/testing/selftests/rseq/rseq-abi.h diff --git a/tools/testing/selftests/rseq/rseq-abi.h b/tools/testing/selftests/rseq/rseq-abi.h new file mode 100644 index 00000000000000..a8c44d9af71fb8 --- /dev/null +++ b/tools/testing/selftests/rseq/rseq-abi.h @@ -0,0 +1,151 @@ +/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ +#ifndef _RSEQ_ABI_H +#define _RSEQ_ABI_H + +/* + * rseq-abi.h + * + * Restartable sequences system call API + * + * Copyright (c) 2015-2022 Mathieu Desnoyers + */ + +#include +#include + +enum rseq_abi_cpu_id_state { + RSEQ_ABI_CPU_ID_UNINITIALIZED = -1, + RSEQ_ABI_CPU_ID_REGISTRATION_FAILED = -2, +}; + +enum rseq_abi_flags { + RSEQ_ABI_FLAG_UNREGISTER = (1 << 0), +}; + +enum rseq_abi_cs_flags_bit { + RSEQ_ABI_CS_FLAG_NO_RESTART_ON_PREEMPT_BIT = 0, + RSEQ_ABI_CS_FLAG_NO_RESTART_ON_SIGNAL_BIT = 1, + RSEQ_ABI_CS_FLAG_NO_RESTART_ON_MIGRATE_BIT = 2, +}; + +enum rseq_abi_cs_flags { + RSEQ_ABI_CS_FLAG_NO_RESTART_ON_PREEMPT = + (1U << RSEQ_ABI_CS_FLAG_NO_RESTART_ON_PREEMPT_BIT), + RSEQ_ABI_CS_FLAG_NO_RESTART_ON_SIGNAL = + (1U << RSEQ_ABI_CS_FLAG_NO_RESTART_ON_SIGNAL_BIT), + RSEQ_ABI_CS_FLAG_NO_RESTART_ON_MIGRATE = + (1U << RSEQ_ABI_CS_FLAG_NO_RESTART_ON_MIGRATE_BIT), +}; + +/* + * struct rseq_abi_cs is aligned on 4 * 8 bytes to ensure it is always + * contained within a single cache-line. It is usually declared as + * link-time constant data. + */ +struct rseq_abi_cs { + /* Version of this structure. */ + __u32 version; + /* enum rseq_abi_cs_flags */ + __u32 flags; + __u64 start_ip; + /* Offset from start_ip. */ + __u64 post_commit_offset; + __u64 abort_ip; +} __attribute__((aligned(4 * sizeof(__u64)))); + +/* + * struct rseq_abi is aligned on 4 * 8 bytes to ensure it is always + * contained within a single cache-line. + * + * A single struct rseq_abi per thread is allowed. + */ +struct rseq_abi { + /* + * Restartable sequences cpu_id_start field. Updated by the + * kernel. Read by user-space with single-copy atomicity + * semantics. This field should only be read by the thread which + * registered this data structure. Aligned on 32-bit. Always + * contains a value in the range of possible CPUs, although the + * value may not be the actual current CPU (e.g. if rseq is not + * initialized). This CPU number value should always be compared + * against the value of the cpu_id field before performing a rseq + * commit or returning a value read from a data structure indexed + * using the cpu_id_start value. + */ + __u32 cpu_id_start; + /* + * Restartable sequences cpu_id field. Updated by the kernel. + * Read by user-space with single-copy atomicity semantics. This + * field should only be read by the thread which registered this + * data structure. Aligned on 32-bit. Values + * RSEQ_CPU_ID_UNINITIALIZED and RSEQ_CPU_ID_REGISTRATION_FAILED + * have a special semantic: the former means "rseq uninitialized", + * and latter means "rseq initialization failed". This value is + * meant to be read within rseq critical sections and compared + * with the cpu_id_start value previously read, before performing + * the commit instruction, or read and compared with the + * cpu_id_start value before returning a value loaded from a data + * structure indexed using the cpu_id_start value. + */ + __u32 cpu_id; + /* + * Restartable sequences rseq_cs field. + * + * Contains NULL when no critical section is active for the current + * thread, or holds a pointer to the currently active struct rseq_cs. + * + * Updated by user-space, which sets the address of the currently + * active rseq_cs at the beginning of assembly instruction sequence + * block, and set to NULL by the kernel when it restarts an assembly + * instruction sequence block, as well as when the kernel detects that + * it is preempting or delivering a signal outside of the range + * targeted by the rseq_cs. Also needs to be set to NULL by user-space + * before reclaiming memory that contains the targeted struct rseq_cs. + * + * Read and set by the kernel. Set by user-space with single-copy + * atomicity semantics. This field should only be updated by the + * thread which registered this data structure. Aligned on 64-bit. + */ + union { + __u64 ptr64; + + /* + * The "arch" field provides architecture accessor for + * the ptr field based on architecture pointer size and + * endianness. + */ + struct { +#ifdef __LP64__ + __u64 ptr; +#elif defined(__BYTE_ORDER) ? (__BYTE_ORDER == __BIG_ENDIAN) : defined(__BIG_ENDIAN) + __u32 padding; /* Initialized to zero. */ + __u32 ptr; +#else + __u32 ptr; + __u32 padding; /* Initialized to zero. */ +#endif + } arch; + } rseq_cs; + + /* + * Restartable sequences flags field. + * + * This field should only be updated by the thread which + * registered this data structure. Read by the kernel. + * Mainly used for single-stepping through rseq critical sections + * with debuggers. + * + * - RSEQ_ABI_CS_FLAG_NO_RESTART_ON_PREEMPT + * Inhibit instruction sequence block restart on preemption + * for this thread. + * - RSEQ_ABI_CS_FLAG_NO_RESTART_ON_SIGNAL + * Inhibit instruction sequence block restart on signal + * delivery for this thread. + * - RSEQ_ABI_CS_FLAG_NO_RESTART_ON_MIGRATE + * Inhibit instruction sequence block restart on migration for + * this thread. + */ + __u32 flags; +} __attribute__((aligned(4 * sizeof(__u64)))); + +#endif /* _RSEQ_ABI_H */ diff --git a/tools/testing/selftests/rseq/rseq.c b/tools/testing/selftests/rseq/rseq.c index fb440dfca15879..bfe1b2692ffc00 100644 --- a/tools/testing/selftests/rseq/rseq.c +++ b/tools/testing/selftests/rseq/rseq.c @@ -30,8 +30,8 @@ #include "../kselftest.h" #include "rseq.h" -__thread volatile struct rseq __rseq_abi = { - .cpu_id = RSEQ_CPU_ID_UNINITIALIZED, +__thread volatile struct rseq_abi __rseq_abi = { + .cpu_id = RSEQ_ABI_CPU_ID_UNINITIALIZED, }; /* @@ -66,7 +66,7 @@ static void signal_restore(sigset_t oldset) abort(); } -static int sys_rseq(volatile struct rseq *rseq_abi, uint32_t rseq_len, +static int sys_rseq(volatile struct rseq_abi *rseq_abi, uint32_t rseq_len, int flags, uint32_t sig) { return syscall(__NR_rseq, rseq_abi, rseq_len, flags, sig); @@ -86,13 +86,13 @@ int rseq_register_current_thread(void) } if (__rseq_refcount++) goto end; - rc = sys_rseq(&__rseq_abi, sizeof(struct rseq), 0, RSEQ_SIG); + rc = sys_rseq(&__rseq_abi, sizeof(struct rseq_abi), 0, RSEQ_SIG); if (!rc) { assert(rseq_current_cpu_raw() >= 0); goto end; } if (errno != EBUSY) - __rseq_abi.cpu_id = RSEQ_CPU_ID_REGISTRATION_FAILED; + __rseq_abi.cpu_id = RSEQ_ABI_CPU_ID_REGISTRATION_FAILED; ret = -1; __rseq_refcount--; end: @@ -114,8 +114,8 @@ int rseq_unregister_current_thread(void) } if (--__rseq_refcount) goto end; - rc = sys_rseq(&__rseq_abi, sizeof(struct rseq), - RSEQ_FLAG_UNREGISTER, RSEQ_SIG); + rc = sys_rseq(&__rseq_abi, sizeof(struct rseq_abi), + RSEQ_ABI_FLAG_UNREGISTER, RSEQ_SIG); if (!rc) goto end; __rseq_refcount = 1; diff --git a/tools/testing/selftests/rseq/rseq.h b/tools/testing/selftests/rseq/rseq.h index 3f63eb362b92fb..cb6bbc53b5861d 100644 --- a/tools/testing/selftests/rseq/rseq.h +++ b/tools/testing/selftests/rseq/rseq.h @@ -16,7 +16,7 @@ #include #include #include -#include +#include "rseq-abi.h" /* * Empty code injection macros, override when testing. @@ -43,7 +43,7 @@ #define RSEQ_INJECT_FAILED #endif -extern __thread volatile struct rseq __rseq_abi; +extern __thread volatile struct rseq_abi __rseq_abi; extern int __rseq_handled; #define rseq_likely(x) __builtin_expect(!!(x), 1) @@ -139,11 +139,7 @@ static inline uint32_t rseq_current_cpu(void) static inline void rseq_clear_rseq_cs(void) { -#ifdef __LP64__ - __rseq_abi.rseq_cs.ptr = 0; -#else - __rseq_abi.rseq_cs.ptr.ptr32 = 0; -#endif + __rseq_abi.rseq_cs.arch.ptr = 0; } /* From 3c35d9cbd99b52dde0019a3b09359f23876b02a7 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Mon, 24 Jan 2022 12:12:41 -0500 Subject: [PATCH 0228/1056] selftests/rseq: Remove useless assignment to cpu variable commit 930378d056eac2c96407b02aafe4938d0ac9cc37 upstream. Signed-off-by: Mathieu Desnoyers Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20220124171253.22072-4-mathieu.desnoyers@efficios.com Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/rseq/param_test.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tools/testing/selftests/rseq/param_test.c b/tools/testing/selftests/rseq/param_test.c index 699ad5f93c34fb..cc2cfc1da9383f 100644 --- a/tools/testing/selftests/rseq/param_test.c +++ b/tools/testing/selftests/rseq/param_test.c @@ -368,9 +368,7 @@ void *test_percpu_spinlock_thread(void *arg) abort(); reps = thread_data->reps; for (i = 0; i < reps; i++) { - int cpu = rseq_cpu_start(); - - cpu = rseq_this_cpu_lock(&data->lock); + int cpu = rseq_this_cpu_lock(&data->lock); data->c[cpu].count++; rseq_percpu_unlock(&data->lock, cpu); #ifndef BENCHMARK From c9a96b4231c32a23e0ac70f84da05590c62dd68e Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Mon, 24 Jan 2022 12:12:42 -0500 Subject: [PATCH 0229/1056] selftests/rseq: Remove volatile from __rseq_abi commit 94b80a19ebfe347a01301d750040a61c38200e2b upstream. This is done in preparation for the selftest uplift to become compatible with glibc-2.35. All accesses to the __rseq_abi fields are volatile, but remove the volatile from the TLS variable declaration, otherwise we are stuck with volatile for the upcoming rseq_get_abi() helper. Signed-off-by: Mathieu Desnoyers Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20220124171253.22072-5-mathieu.desnoyers@efficios.com Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/rseq/rseq.c | 4 ++-- tools/testing/selftests/rseq/rseq.h | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/rseq/rseq.c b/tools/testing/selftests/rseq/rseq.c index bfe1b2692ffc00..1f905b60728a35 100644 --- a/tools/testing/selftests/rseq/rseq.c +++ b/tools/testing/selftests/rseq/rseq.c @@ -30,7 +30,7 @@ #include "../kselftest.h" #include "rseq.h" -__thread volatile struct rseq_abi __rseq_abi = { +__thread struct rseq_abi __rseq_abi = { .cpu_id = RSEQ_ABI_CPU_ID_UNINITIALIZED, }; @@ -92,7 +92,7 @@ int rseq_register_current_thread(void) goto end; } if (errno != EBUSY) - __rseq_abi.cpu_id = RSEQ_ABI_CPU_ID_REGISTRATION_FAILED; + RSEQ_WRITE_ONCE(__rseq_abi.cpu_id, RSEQ_ABI_CPU_ID_REGISTRATION_FAILED); ret = -1; __rseq_refcount--; end: diff --git a/tools/testing/selftests/rseq/rseq.h b/tools/testing/selftests/rseq/rseq.h index cb6bbc53b5861d..d580f8e9c00120 100644 --- a/tools/testing/selftests/rseq/rseq.h +++ b/tools/testing/selftests/rseq/rseq.h @@ -43,7 +43,7 @@ #define RSEQ_INJECT_FAILED #endif -extern __thread volatile struct rseq_abi __rseq_abi; +extern __thread struct rseq_abi __rseq_abi; extern int __rseq_handled; #define rseq_likely(x) __builtin_expect(!!(x), 1) @@ -139,7 +139,7 @@ static inline uint32_t rseq_current_cpu(void) static inline void rseq_clear_rseq_cs(void) { - __rseq_abi.rseq_cs.arch.ptr = 0; + RSEQ_WRITE_ONCE(__rseq_abi.rseq_cs.arch.ptr, 0); } /* From d471088d078b8f57866621c528d3296188f1d10f Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Mon, 24 Jan 2022 12:12:43 -0500 Subject: [PATCH 0230/1056] selftests/rseq: Introduce rseq_get_abi() helper commit e546cd48ccc456074ddb8920732aef4af65d7ca7 upstream. This is done in preparation for the selftest uplift to become compatible with glibc-2.35. glibc-2.35 exposes the rseq per-thread data in the TCB, accessible at an offset from the thread pointer, rather than through an actual Thread-Local Storage (TLS) variable, as the kernel selftests initially expected. Introduce a rseq_get_abi() helper, initially using the __rseq_abi TLS variable, in preparation for changing this userspace ABI for one which is compatible with glibc-2.35. Note that the __rseq_abi TLS and glibc-2.35's ABI for per-thread data cannot actively coexist in a process, because the kernel supports only a single rseq registration per thread. Signed-off-by: Mathieu Desnoyers Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20220124171253.22072-6-mathieu.desnoyers@efficios.com Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/rseq/rseq-arm.h | 32 +++++++++++------------ tools/testing/selftests/rseq/rseq-arm64.h | 32 +++++++++++------------ tools/testing/selftests/rseq/rseq-mips.h | 32 +++++++++++------------ tools/testing/selftests/rseq/rseq-ppc.h | 32 +++++++++++------------ tools/testing/selftests/rseq/rseq-s390.h | 24 ++++++++--------- tools/testing/selftests/rseq/rseq-x86.h | 30 ++++++++++----------- tools/testing/selftests/rseq/rseq.h | 11 +++++--- 7 files changed, 99 insertions(+), 94 deletions(-) diff --git a/tools/testing/selftests/rseq/rseq-arm.h b/tools/testing/selftests/rseq/rseq-arm.h index 5943c816c07ce1..6716540e17c614 100644 --- a/tools/testing/selftests/rseq/rseq-arm.h +++ b/tools/testing/selftests/rseq/rseq-arm.h @@ -185,8 +185,8 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu) "5:\n\t" : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "m" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs), [v] "m" (*v), [expect] "r" (expect), [newv] "r" (newv) @@ -255,8 +255,8 @@ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot, "5:\n\t" : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "m" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs), /* final store input */ [v] "m" (*v), [expectnot] "r" (expectnot), @@ -316,8 +316,8 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu) "5:\n\t" : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "m" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs), [v] "m" (*v), [count] "Ir" (count) RSEQ_INJECT_INPUT @@ -381,8 +381,8 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect, "5:\n\t" : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "m" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs), /* try store input */ [v2] "m" (*v2), [newv2] "r" (newv2), @@ -457,8 +457,8 @@ int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect, "5:\n\t" : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "m" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs), /* try store input */ [v2] "m" (*v2), [newv2] "r" (newv2), @@ -537,8 +537,8 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect, "5:\n\t" : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "m" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs), /* cmp2 input */ [v2] "m" (*v2), [expect2] "r" (expect2), @@ -657,8 +657,8 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect, "8:\n\t" : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "m" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs), /* final store input */ [v] "m" (*v), [expect] "r" (expect), @@ -782,8 +782,8 @@ int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect, "8:\n\t" : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "m" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs), /* final store input */ [v] "m" (*v), [expect] "r" (expect), diff --git a/tools/testing/selftests/rseq/rseq-arm64.h b/tools/testing/selftests/rseq/rseq-arm64.h index 200dae9e4208c4..b9d9b3aa6e9b42 100644 --- a/tools/testing/selftests/rseq/rseq-arm64.h +++ b/tools/testing/selftests/rseq/rseq-arm64.h @@ -230,8 +230,8 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu) RSEQ_ASM_DEFINE_ABORT(4, abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "Qo" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "Qo" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs), [v] "Qo" (*v), [expect] "r" (expect), [newv] "r" (newv) @@ -287,8 +287,8 @@ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot, RSEQ_ASM_DEFINE_ABORT(4, abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "Qo" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "Qo" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs), [v] "Qo" (*v), [expectnot] "r" (expectnot), [load] "Qo" (*load), @@ -337,8 +337,8 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu) RSEQ_ASM_DEFINE_ABORT(4, abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "Qo" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "Qo" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs), [v] "Qo" (*v), [count] "r" (count) RSEQ_INJECT_INPUT @@ -388,8 +388,8 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect, RSEQ_ASM_DEFINE_ABORT(4, abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "Qo" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "Qo" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs), [expect] "r" (expect), [v] "Qo" (*v), [newv] "r" (newv), @@ -447,8 +447,8 @@ int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect, RSEQ_ASM_DEFINE_ABORT(4, abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "Qo" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "Qo" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs), [expect] "r" (expect), [v] "Qo" (*v), [newv] "r" (newv), @@ -508,8 +508,8 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect, RSEQ_ASM_DEFINE_ABORT(4, abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "Qo" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "Qo" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs), [v] "Qo" (*v), [expect] "r" (expect), [v2] "Qo" (*v2), @@ -569,8 +569,8 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect, RSEQ_ASM_DEFINE_ABORT(4, abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "Qo" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "Qo" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs), [expect] "r" (expect), [v] "Qo" (*v), [newv] "r" (newv), @@ -629,8 +629,8 @@ int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect, RSEQ_ASM_DEFINE_ABORT(4, abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "Qo" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "Qo" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs), [expect] "r" (expect), [v] "Qo" (*v), [newv] "r" (newv), diff --git a/tools/testing/selftests/rseq/rseq-mips.h b/tools/testing/selftests/rseq/rseq-mips.h index e989e7c14b0972..2b1f5bd9526819 100644 --- a/tools/testing/selftests/rseq/rseq-mips.h +++ b/tools/testing/selftests/rseq/rseq-mips.h @@ -190,8 +190,8 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu) "5:\n\t" : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "m" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs), [v] "m" (*v), [expect] "r" (expect), [newv] "r" (newv) @@ -258,8 +258,8 @@ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot, "5:\n\t" : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "m" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs), /* final store input */ [v] "m" (*v), [expectnot] "r" (expectnot), @@ -319,8 +319,8 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu) "5:\n\t" : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "m" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs), [v] "m" (*v), [count] "Ir" (count) RSEQ_INJECT_INPUT @@ -382,8 +382,8 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect, "5:\n\t" : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "m" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs), /* try store input */ [v2] "m" (*v2), [newv2] "r" (newv2), @@ -456,8 +456,8 @@ int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect, "5:\n\t" : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "m" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs), /* try store input */ [v2] "m" (*v2), [newv2] "r" (newv2), @@ -532,8 +532,8 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect, "5:\n\t" : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "m" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs), /* cmp2 input */ [v2] "m" (*v2), [expect2] "r" (expect2), @@ -649,8 +649,8 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect, "8:\n\t" : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "m" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs), /* final store input */ [v] "m" (*v), [expect] "r" (expect), @@ -771,8 +771,8 @@ int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect, "8:\n\t" : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "m" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs), /* final store input */ [v] "m" (*v), [expect] "r" (expect), diff --git a/tools/testing/selftests/rseq/rseq-ppc.h b/tools/testing/selftests/rseq/rseq-ppc.h index 76be90196fe4f1..2e6b7572ba0800 100644 --- a/tools/testing/selftests/rseq/rseq-ppc.h +++ b/tools/testing/selftests/rseq/rseq-ppc.h @@ -235,8 +235,8 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu) RSEQ_ASM_DEFINE_ABORT(4, abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "m" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs), [v] "m" (*v), [expect] "r" (expect), [newv] "r" (newv) @@ -301,8 +301,8 @@ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot, RSEQ_ASM_DEFINE_ABORT(4, abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "m" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs), /* final store input */ [v] "m" (*v), [expectnot] "r" (expectnot), @@ -359,8 +359,8 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu) RSEQ_ASM_DEFINE_ABORT(4, abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "m" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs), /* final store input */ [v] "m" (*v), [count] "r" (count) @@ -419,8 +419,8 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect, RSEQ_ASM_DEFINE_ABORT(4, abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "m" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs), /* try store input */ [v2] "m" (*v2), [newv2] "r" (newv2), @@ -489,8 +489,8 @@ int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect, RSEQ_ASM_DEFINE_ABORT(4, abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "m" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs), /* try store input */ [v2] "m" (*v2), [newv2] "r" (newv2), @@ -560,8 +560,8 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect, RSEQ_ASM_DEFINE_ABORT(4, abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "m" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs), /* cmp2 input */ [v2] "m" (*v2), [expect2] "r" (expect2), @@ -635,8 +635,8 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect, RSEQ_ASM_DEFINE_ABORT(4, abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "m" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs), /* final store input */ [v] "m" (*v), [expect] "r" (expect), @@ -711,8 +711,8 @@ int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect, RSEQ_ASM_DEFINE_ABORT(4, abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "m" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs), /* final store input */ [v] "m" (*v), [expect] "r" (expect), diff --git a/tools/testing/selftests/rseq/rseq-s390.h b/tools/testing/selftests/rseq/rseq-s390.h index 8ef94ad1cbb452..b906e044d2a348 100644 --- a/tools/testing/selftests/rseq/rseq-s390.h +++ b/tools/testing/selftests/rseq/rseq-s390.h @@ -165,8 +165,8 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu) RSEQ_ASM_DEFINE_ABORT(4, "", abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "m" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs), [v] "m" (*v), [expect] "r" (expect), [newv] "r" (newv) @@ -233,8 +233,8 @@ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot, RSEQ_ASM_DEFINE_ABORT(4, "", abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "m" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs), /* final store input */ [v] "m" (*v), [expectnot] "r" (expectnot), @@ -288,8 +288,8 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu) RSEQ_ASM_DEFINE_ABORT(4, "", abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "m" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs), /* final store input */ [v] "m" (*v), [count] "r" (count) @@ -347,8 +347,8 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect, RSEQ_ASM_DEFINE_ABORT(4, "", abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "m" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs), /* try store input */ [v2] "m" (*v2), [newv2] "r" (newv2), @@ -426,8 +426,8 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect, RSEQ_ASM_DEFINE_ABORT(4, "", abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "m" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs), /* cmp2 input */ [v2] "m" (*v2), [expect2] "r" (expect2), @@ -534,8 +534,8 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect, #endif : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "m" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs), /* final store input */ [v] "m" (*v), [expect] "r" (expect), diff --git a/tools/testing/selftests/rseq/rseq-x86.h b/tools/testing/selftests/rseq/rseq-x86.h index 640411518e4664..1d9fa0516e5365 100644 --- a/tools/testing/selftests/rseq/rseq-x86.h +++ b/tools/testing/selftests/rseq/rseq-x86.h @@ -141,7 +141,7 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu) RSEQ_ASM_DEFINE_ABORT(4, "", abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [rseq_abi] "r" (&__rseq_abi), + [rseq_abi] "r" (rseq_get_abi()), [v] "m" (*v), [expect] "r" (expect), [newv] "r" (newv) @@ -207,7 +207,7 @@ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot, RSEQ_ASM_DEFINE_ABORT(4, "", abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [rseq_abi] "r" (&__rseq_abi), + [rseq_abi] "r" (rseq_get_abi()), /* final store input */ [v] "m" (*v), [expectnot] "r" (expectnot), @@ -258,7 +258,7 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu) RSEQ_ASM_DEFINE_ABORT(4, "", abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [rseq_abi] "r" (&__rseq_abi), + [rseq_abi] "r" (rseq_get_abi()), /* final store input */ [v] "m" (*v), [count] "er" (count) @@ -314,7 +314,7 @@ int rseq_offset_deref_addv(intptr_t *ptr, off_t off, intptr_t inc, int cpu) RSEQ_ASM_DEFINE_ABORT(4, "", abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [rseq_abi] "r" (&__rseq_abi), + [rseq_abi] "r" (rseq_get_abi()), /* final store input */ [ptr] "m" (*ptr), [off] "er" (off), @@ -372,7 +372,7 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect, RSEQ_ASM_DEFINE_ABORT(4, "", abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [rseq_abi] "r" (&__rseq_abi), + [rseq_abi] "r" (rseq_get_abi()), /* try store input */ [v2] "m" (*v2), [newv2] "r" (newv2), @@ -449,7 +449,7 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect, RSEQ_ASM_DEFINE_ABORT(4, "", abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [rseq_abi] "r" (&__rseq_abi), + [rseq_abi] "r" (rseq_get_abi()), /* cmp2 input */ [v2] "m" (*v2), [expect2] "r" (expect2), @@ -555,7 +555,7 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect, #endif : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [rseq_abi] "r" (&__rseq_abi), + [rseq_abi] "r" (rseq_get_abi()), /* final store input */ [v] "m" (*v), [expect] "r" (expect), @@ -719,7 +719,7 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu) RSEQ_ASM_DEFINE_ABORT(4, "", abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [rseq_abi] "r" (&__rseq_abi), + [rseq_abi] "r" (rseq_get_abi()), [v] "m" (*v), [expect] "r" (expect), [newv] "r" (newv) @@ -785,7 +785,7 @@ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot, RSEQ_ASM_DEFINE_ABORT(4, "", abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [rseq_abi] "r" (&__rseq_abi), + [rseq_abi] "r" (rseq_get_abi()), /* final store input */ [v] "m" (*v), [expectnot] "r" (expectnot), @@ -836,7 +836,7 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu) RSEQ_ASM_DEFINE_ABORT(4, "", abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [rseq_abi] "r" (&__rseq_abi), + [rseq_abi] "r" (rseq_get_abi()), /* final store input */ [v] "m" (*v), [count] "ir" (count) @@ -894,7 +894,7 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect, RSEQ_ASM_DEFINE_ABORT(4, "", abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [rseq_abi] "r" (&__rseq_abi), + [rseq_abi] "r" (rseq_get_abi()), /* try store input */ [v2] "m" (*v2), [newv2] "m" (newv2), @@ -962,7 +962,7 @@ int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect, RSEQ_ASM_DEFINE_ABORT(4, "", abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [rseq_abi] "r" (&__rseq_abi), + [rseq_abi] "r" (rseq_get_abi()), /* try store input */ [v2] "m" (*v2), [newv2] "r" (newv2), @@ -1032,7 +1032,7 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect, RSEQ_ASM_DEFINE_ABORT(4, "", abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [rseq_abi] "r" (&__rseq_abi), + [rseq_abi] "r" (rseq_get_abi()), /* cmp2 input */ [v2] "m" (*v2), [expect2] "r" (expect2), @@ -1142,7 +1142,7 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect, #endif : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [rseq_abi] "r" (&__rseq_abi), + [rseq_abi] "r" (rseq_get_abi()), /* final store input */ [v] "m" (*v), [expect] "m" (expect), @@ -1255,7 +1255,7 @@ int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect, #endif : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [rseq_abi] "r" (&__rseq_abi), + [rseq_abi] "r" (rseq_get_abi()), /* final store input */ [v] "m" (*v), [expect] "m" (expect), diff --git a/tools/testing/selftests/rseq/rseq.h b/tools/testing/selftests/rseq/rseq.h index d580f8e9c00120..ca668a2af1729f 100644 --- a/tools/testing/selftests/rseq/rseq.h +++ b/tools/testing/selftests/rseq/rseq.h @@ -46,6 +46,11 @@ extern __thread struct rseq_abi __rseq_abi; extern int __rseq_handled; +static inline struct rseq_abi *rseq_get_abi(void) +{ + return &__rseq_abi; +} + #define rseq_likely(x) __builtin_expect(!!(x), 1) #define rseq_unlikely(x) __builtin_expect(!!(x), 0) #define rseq_barrier() __asm__ __volatile__("" : : : "memory") @@ -108,7 +113,7 @@ int32_t rseq_fallback_current_cpu(void); */ static inline int32_t rseq_current_cpu_raw(void) { - return RSEQ_ACCESS_ONCE(__rseq_abi.cpu_id); + return RSEQ_ACCESS_ONCE(rseq_get_abi()->cpu_id); } /* @@ -124,7 +129,7 @@ static inline int32_t rseq_current_cpu_raw(void) */ static inline uint32_t rseq_cpu_start(void) { - return RSEQ_ACCESS_ONCE(__rseq_abi.cpu_id_start); + return RSEQ_ACCESS_ONCE(rseq_get_abi()->cpu_id_start); } static inline uint32_t rseq_current_cpu(void) @@ -139,7 +144,7 @@ static inline uint32_t rseq_current_cpu(void) static inline void rseq_clear_rseq_cs(void) { - RSEQ_WRITE_ONCE(__rseq_abi.rseq_cs.arch.ptr, 0); + RSEQ_WRITE_ONCE(rseq_get_abi()->rseq_cs.arch.ptr, 0); } /* From e49d1c413d2825d5df94f7ba012f22233f0802cf Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Mon, 24 Jan 2022 12:12:44 -0500 Subject: [PATCH 0231/1056] selftests/rseq: Introduce thread pointer getters commit 886ddfba933f5ce9d76c278165d834d114ba4ffc upstream. This is done in preparation for the selftest uplift to become compatible with glibc-2.35. glibc-2.35 exposes the rseq per-thread data in the TCB, accessible at an offset from the thread pointer. The toolchains do not implement accessing the thread pointer on all architectures. Provide thread pointer getters for ppc and x86 which lack (or lacked until recently) toolchain support. Signed-off-by: Mathieu Desnoyers Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20220124171253.22072-7-mathieu.desnoyers@efficios.com Signed-off-by: Greg Kroah-Hartman --- .../rseq/rseq-generic-thread-pointer.h | 25 ++++++++++++ .../selftests/rseq/rseq-ppc-thread-pointer.h | 30 ++++++++++++++ .../selftests/rseq/rseq-thread-pointer.h | 19 +++++++++ .../selftests/rseq/rseq-x86-thread-pointer.h | 40 +++++++++++++++++++ 4 files changed, 114 insertions(+) create mode 100644 tools/testing/selftests/rseq/rseq-generic-thread-pointer.h create mode 100644 tools/testing/selftests/rseq/rseq-ppc-thread-pointer.h create mode 100644 tools/testing/selftests/rseq/rseq-thread-pointer.h create mode 100644 tools/testing/selftests/rseq/rseq-x86-thread-pointer.h diff --git a/tools/testing/selftests/rseq/rseq-generic-thread-pointer.h b/tools/testing/selftests/rseq/rseq-generic-thread-pointer.h new file mode 100644 index 00000000000000..38c58466157142 --- /dev/null +++ b/tools/testing/selftests/rseq/rseq-generic-thread-pointer.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: LGPL-2.1-only OR MIT */ +/* + * rseq-generic-thread-pointer.h + * + * (C) Copyright 2021 - Mathieu Desnoyers + */ + +#ifndef _RSEQ_GENERIC_THREAD_POINTER +#define _RSEQ_GENERIC_THREAD_POINTER + +#ifdef __cplusplus +extern "C" { +#endif + +/* Use gcc builtin thread pointer. */ +static inline void *rseq_thread_pointer(void) +{ + return __builtin_thread_pointer(); +} + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/tools/testing/selftests/rseq/rseq-ppc-thread-pointer.h b/tools/testing/selftests/rseq/rseq-ppc-thread-pointer.h new file mode 100644 index 00000000000000..263eee84fb7605 --- /dev/null +++ b/tools/testing/selftests/rseq/rseq-ppc-thread-pointer.h @@ -0,0 +1,30 @@ +/* SPDX-License-Identifier: LGPL-2.1-only OR MIT */ +/* + * rseq-ppc-thread-pointer.h + * + * (C) Copyright 2021 - Mathieu Desnoyers + */ + +#ifndef _RSEQ_PPC_THREAD_POINTER +#define _RSEQ_PPC_THREAD_POINTER + +#ifdef __cplusplus +extern "C" { +#endif + +static inline void *rseq_thread_pointer(void) +{ +#ifdef __powerpc64__ + register void *__result asm ("r13"); +#else + register void *__result asm ("r2"); +#endif + asm ("" : "=r" (__result)); + return __result; +} + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/tools/testing/selftests/rseq/rseq-thread-pointer.h b/tools/testing/selftests/rseq/rseq-thread-pointer.h new file mode 100644 index 00000000000000..977c25d758b2ab --- /dev/null +++ b/tools/testing/selftests/rseq/rseq-thread-pointer.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: LGPL-2.1-only OR MIT */ +/* + * rseq-thread-pointer.h + * + * (C) Copyright 2021 - Mathieu Desnoyers + */ + +#ifndef _RSEQ_THREAD_POINTER +#define _RSEQ_THREAD_POINTER + +#if defined(__x86_64__) || defined(__i386__) +#include "rseq-x86-thread-pointer.h" +#elif defined(__PPC__) +#include "rseq-ppc-thread-pointer.h" +#else +#include "rseq-generic-thread-pointer.h" +#endif + +#endif diff --git a/tools/testing/selftests/rseq/rseq-x86-thread-pointer.h b/tools/testing/selftests/rseq/rseq-x86-thread-pointer.h new file mode 100644 index 00000000000000..d3133587d99682 --- /dev/null +++ b/tools/testing/selftests/rseq/rseq-x86-thread-pointer.h @@ -0,0 +1,40 @@ +/* SPDX-License-Identifier: LGPL-2.1-only OR MIT */ +/* + * rseq-x86-thread-pointer.h + * + * (C) Copyright 2021 - Mathieu Desnoyers + */ + +#ifndef _RSEQ_X86_THREAD_POINTER +#define _RSEQ_X86_THREAD_POINTER + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +#if __GNUC_PREREQ (11, 1) +static inline void *rseq_thread_pointer(void) +{ + return __builtin_thread_pointer(); +} +#else +static inline void *rseq_thread_pointer(void) +{ + void *__result; + +# ifdef __x86_64__ + __asm__ ("mov %%fs:0, %0" : "=r" (__result)); +# else + __asm__ ("mov %%gs:0, %0" : "=r" (__result)); +# endif + return __result; +} +#endif /* !GCC 11 */ + +#ifdef __cplusplus +} +#endif + +#endif From b9a8ebe29636000ed2fdf039dbe0bd186393c150 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Mon, 24 Jan 2022 12:12:45 -0500 Subject: [PATCH 0232/1056] selftests/rseq: Uplift rseq selftests for compatibility with glibc-2.35 commit 233e667e1ae3e348686bd9dd0172e62a09d852e1 upstream. glibc-2.35 (upcoming release date 2022-02-01) exposes the rseq per-thread data in the TCB, accessible at an offset from the thread pointer, rather than through an actual Thread-Local Storage (TLS) variable, as the Linux kernel selftests initially expected. The __rseq_abi TLS and glibc-2.35's ABI for per-thread data cannot actively coexist in a process, because the kernel supports only a single rseq registration per thread. Here is the scheme introduced to ensure selftests can work both with an older glibc and with glibc-2.35+: - librseq exposes its own "rseq_offset, rseq_size, rseq_flags" ABI. - librseq queries for glibc rseq ABI (__rseq_offset, __rseq_size, __rseq_flags) using dlsym() in a librseq library constructor. If those are found, copy their values into rseq_offset, rseq_size, and rseq_flags. - Else, if those glibc symbols are not found, handle rseq registration from librseq and use its own IE-model TLS to implement the rseq ABI per-thread storage. Signed-off-by: Mathieu Desnoyers Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20220124171253.22072-8-mathieu.desnoyers@efficios.com Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/rseq/Makefile | 2 +- tools/testing/selftests/rseq/rseq.c | 161 ++++++++++++-------------- tools/testing/selftests/rseq/rseq.h | 13 ++- 3 files changed, 88 insertions(+), 88 deletions(-) diff --git a/tools/testing/selftests/rseq/Makefile b/tools/testing/selftests/rseq/Makefile index 2af9d39a97168c..215e1067f03763 100644 --- a/tools/testing/selftests/rseq/Makefile +++ b/tools/testing/selftests/rseq/Makefile @@ -6,7 +6,7 @@ endif CFLAGS += -O2 -Wall -g -I./ -I../../../../usr/include/ -L$(OUTPUT) -Wl,-rpath=./ \ $(CLANG_FLAGS) -LDLIBS += -lpthread +LDLIBS += -lpthread -ldl # Own dependencies because we only want to build against 1st prerequisite, but # still track changes to header files and depend on shared object. diff --git a/tools/testing/selftests/rseq/rseq.c b/tools/testing/selftests/rseq/rseq.c index 1f905b60728a35..07ba0d463a9674 100644 --- a/tools/testing/selftests/rseq/rseq.c +++ b/tools/testing/selftests/rseq/rseq.c @@ -26,130 +26,123 @@ #include #include #include +#include #include "../kselftest.h" #include "rseq.h" -__thread struct rseq_abi __rseq_abi = { - .cpu_id = RSEQ_ABI_CPU_ID_UNINITIALIZED, -}; +static const int *libc_rseq_offset_p; +static const unsigned int *libc_rseq_size_p; +static const unsigned int *libc_rseq_flags_p; -/* - * Shared with other libraries. This library may take rseq ownership if it is - * still 0 when executing the library constructor. Set to 1 by library - * constructor when handling rseq. Set to 0 in destructor if handling rseq. - */ -int __rseq_handled; +/* Offset from the thread pointer to the rseq area. */ +int rseq_offset; + +/* Size of the registered rseq area. 0 if the registration was + unsuccessful. */ +unsigned int rseq_size = -1U; + +/* Flags used during rseq registration. */ +unsigned int rseq_flags; -/* Whether this library have ownership of rseq registration. */ static int rseq_ownership; -static __thread volatile uint32_t __rseq_refcount; +static +__thread struct rseq_abi __rseq_abi __attribute__((tls_model("initial-exec"))) = { + .cpu_id = RSEQ_ABI_CPU_ID_UNINITIALIZED, +}; -static void signal_off_save(sigset_t *oldset) +static int sys_rseq(struct rseq_abi *rseq_abi, uint32_t rseq_len, + int flags, uint32_t sig) { - sigset_t set; - int ret; - - sigfillset(&set); - ret = pthread_sigmask(SIG_BLOCK, &set, oldset); - if (ret) - abort(); + return syscall(__NR_rseq, rseq_abi, rseq_len, flags, sig); } -static void signal_restore(sigset_t oldset) +int rseq_available(void) { - int ret; + int rc; - ret = pthread_sigmask(SIG_SETMASK, &oldset, NULL); - if (ret) + rc = sys_rseq(NULL, 0, 0, 0); + if (rc != -1) abort(); -} - -static int sys_rseq(volatile struct rseq_abi *rseq_abi, uint32_t rseq_len, - int flags, uint32_t sig) -{ - return syscall(__NR_rseq, rseq_abi, rseq_len, flags, sig); + switch (errno) { + case ENOSYS: + return 0; + case EINVAL: + return 1; + default: + abort(); + } } int rseq_register_current_thread(void) { - int rc, ret = 0; - sigset_t oldset; + int rc; - if (!rseq_ownership) + if (!rseq_ownership) { + /* Treat libc's ownership as a successful registration. */ return 0; - signal_off_save(&oldset); - if (__rseq_refcount == UINT_MAX) { - ret = -1; - goto end; } - if (__rseq_refcount++) - goto end; rc = sys_rseq(&__rseq_abi, sizeof(struct rseq_abi), 0, RSEQ_SIG); - if (!rc) { - assert(rseq_current_cpu_raw() >= 0); - goto end; - } - if (errno != EBUSY) - RSEQ_WRITE_ONCE(__rseq_abi.cpu_id, RSEQ_ABI_CPU_ID_REGISTRATION_FAILED); - ret = -1; - __rseq_refcount--; -end: - signal_restore(oldset); - return ret; + if (rc) + return -1; + assert(rseq_current_cpu_raw() >= 0); + return 0; } int rseq_unregister_current_thread(void) { - int rc, ret = 0; - sigset_t oldset; + int rc; - if (!rseq_ownership) + if (!rseq_ownership) { + /* Treat libc's ownership as a successful unregistration. */ return 0; - signal_off_save(&oldset); - if (!__rseq_refcount) { - ret = -1; - goto end; } - if (--__rseq_refcount) - goto end; - rc = sys_rseq(&__rseq_abi, sizeof(struct rseq_abi), - RSEQ_ABI_FLAG_UNREGISTER, RSEQ_SIG); - if (!rc) - goto end; - __rseq_refcount = 1; - ret = -1; -end: - signal_restore(oldset); - return ret; + rc = sys_rseq(&__rseq_abi, sizeof(struct rseq_abi), RSEQ_ABI_FLAG_UNREGISTER, RSEQ_SIG); + if (rc) + return -1; + return 0; } -int32_t rseq_fallback_current_cpu(void) +static __attribute__((constructor)) +void rseq_init(void) { - int32_t cpu; - - cpu = sched_getcpu(); - if (cpu < 0) { - perror("sched_getcpu()"); - abort(); + libc_rseq_offset_p = dlsym(RTLD_NEXT, "__rseq_offset"); + libc_rseq_size_p = dlsym(RTLD_NEXT, "__rseq_size"); + libc_rseq_flags_p = dlsym(RTLD_NEXT, "__rseq_flags"); + if (libc_rseq_size_p && libc_rseq_offset_p && libc_rseq_flags_p) { + /* rseq registration owned by glibc */ + rseq_offset = *libc_rseq_offset_p; + rseq_size = *libc_rseq_size_p; + rseq_flags = *libc_rseq_flags_p; + return; } - return cpu; -} - -void __attribute__((constructor)) rseq_init(void) -{ - /* Check whether rseq is handled by another library. */ - if (__rseq_handled) + if (!rseq_available()) return; - __rseq_handled = 1; rseq_ownership = 1; + rseq_offset = (void *)&__rseq_abi - rseq_thread_pointer(); + rseq_size = sizeof(struct rseq_abi); + rseq_flags = 0; } -void __attribute__((destructor)) rseq_fini(void) +static __attribute__((destructor)) +void rseq_exit(void) { if (!rseq_ownership) return; - __rseq_handled = 0; + rseq_offset = 0; + rseq_size = -1U; rseq_ownership = 0; } + +int32_t rseq_fallback_current_cpu(void) +{ + int32_t cpu; + + cpu = sched_getcpu(); + if (cpu < 0) { + perror("sched_getcpu()"); + abort(); + } + return cpu; +} diff --git a/tools/testing/selftests/rseq/rseq.h b/tools/testing/selftests/rseq/rseq.h index ca668a2af1729f..17531ccd309065 100644 --- a/tools/testing/selftests/rseq/rseq.h +++ b/tools/testing/selftests/rseq/rseq.h @@ -43,12 +43,19 @@ #define RSEQ_INJECT_FAILED #endif -extern __thread struct rseq_abi __rseq_abi; -extern int __rseq_handled; +#include "rseq-thread-pointer.h" + +/* Offset from the thread pointer to the rseq area. */ +extern int rseq_offset; +/* Size of the registered rseq area. 0 if the registration was + unsuccessful. */ +extern unsigned int rseq_size; +/* Flags used during rseq registration. */ +extern unsigned int rseq_flags; static inline struct rseq_abi *rseq_get_abi(void) { - return &__rseq_abi; + return (struct rseq_abi *) ((uintptr_t) rseq_thread_pointer() + rseq_offset); } #define rseq_likely(x) __builtin_expect(!!(x), 1) From 1969c5eff9647e2a6eecb8b7844a6fa39791b3d8 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Mon, 24 Jan 2022 12:12:46 -0500 Subject: [PATCH 0233/1056] selftests/rseq: Fix ppc32: wrong rseq_cs 32-bit field pointer on big endian commit 24d1136a29da5953de5c0cbc6c83eb62a1e0bf14 upstream. ppc32 incorrectly uses padding as rseq_cs pointer field. Fix this by using the rseq_cs.arch.ptr field. Use this field across all architectures. Signed-off-by: Mathieu Desnoyers Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20220124171253.22072-9-mathieu.desnoyers@efficios.com Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/rseq/rseq-arm.h | 16 ++++++++-------- tools/testing/selftests/rseq/rseq-arm64.h | 16 ++++++++-------- tools/testing/selftests/rseq/rseq-mips.h | 16 ++++++++-------- tools/testing/selftests/rseq/rseq-ppc.h | 16 ++++++++-------- tools/testing/selftests/rseq/rseq-s390.h | 12 ++++++------ 5 files changed, 38 insertions(+), 38 deletions(-) diff --git a/tools/testing/selftests/rseq/rseq-arm.h b/tools/testing/selftests/rseq/rseq-arm.h index 6716540e17c614..5f567b3b40f2a3 100644 --- a/tools/testing/selftests/rseq/rseq-arm.h +++ b/tools/testing/selftests/rseq/rseq-arm.h @@ -186,7 +186,7 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), [current_cpu_id] "m" (rseq_get_abi()->cpu_id), - [rseq_cs] "m" (rseq_get_abi()->rseq_cs), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), [v] "m" (*v), [expect] "r" (expect), [newv] "r" (newv) @@ -256,7 +256,7 @@ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot, : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), [current_cpu_id] "m" (rseq_get_abi()->cpu_id), - [rseq_cs] "m" (rseq_get_abi()->rseq_cs), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), /* final store input */ [v] "m" (*v), [expectnot] "r" (expectnot), @@ -317,7 +317,7 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), [current_cpu_id] "m" (rseq_get_abi()->cpu_id), - [rseq_cs] "m" (rseq_get_abi()->rseq_cs), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), [v] "m" (*v), [count] "Ir" (count) RSEQ_INJECT_INPUT @@ -382,7 +382,7 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect, : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), [current_cpu_id] "m" (rseq_get_abi()->cpu_id), - [rseq_cs] "m" (rseq_get_abi()->rseq_cs), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), /* try store input */ [v2] "m" (*v2), [newv2] "r" (newv2), @@ -458,7 +458,7 @@ int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect, : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), [current_cpu_id] "m" (rseq_get_abi()->cpu_id), - [rseq_cs] "m" (rseq_get_abi()->rseq_cs), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), /* try store input */ [v2] "m" (*v2), [newv2] "r" (newv2), @@ -538,7 +538,7 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect, : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), [current_cpu_id] "m" (rseq_get_abi()->cpu_id), - [rseq_cs] "m" (rseq_get_abi()->rseq_cs), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), /* cmp2 input */ [v2] "m" (*v2), [expect2] "r" (expect2), @@ -658,7 +658,7 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect, : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), [current_cpu_id] "m" (rseq_get_abi()->cpu_id), - [rseq_cs] "m" (rseq_get_abi()->rseq_cs), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), /* final store input */ [v] "m" (*v), [expect] "r" (expect), @@ -783,7 +783,7 @@ int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect, : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), [current_cpu_id] "m" (rseq_get_abi()->cpu_id), - [rseq_cs] "m" (rseq_get_abi()->rseq_cs), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), /* final store input */ [v] "m" (*v), [expect] "r" (expect), diff --git a/tools/testing/selftests/rseq/rseq-arm64.h b/tools/testing/selftests/rseq/rseq-arm64.h index b9d9b3aa6e9b42..d0f2b7feee9413 100644 --- a/tools/testing/selftests/rseq/rseq-arm64.h +++ b/tools/testing/selftests/rseq/rseq-arm64.h @@ -231,7 +231,7 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), [current_cpu_id] "Qo" (rseq_get_abi()->cpu_id), - [rseq_cs] "m" (rseq_get_abi()->rseq_cs), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), [v] "Qo" (*v), [expect] "r" (expect), [newv] "r" (newv) @@ -288,7 +288,7 @@ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot, : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), [current_cpu_id] "Qo" (rseq_get_abi()->cpu_id), - [rseq_cs] "m" (rseq_get_abi()->rseq_cs), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), [v] "Qo" (*v), [expectnot] "r" (expectnot), [load] "Qo" (*load), @@ -338,7 +338,7 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), [current_cpu_id] "Qo" (rseq_get_abi()->cpu_id), - [rseq_cs] "m" (rseq_get_abi()->rseq_cs), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), [v] "Qo" (*v), [count] "r" (count) RSEQ_INJECT_INPUT @@ -389,7 +389,7 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect, : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), [current_cpu_id] "Qo" (rseq_get_abi()->cpu_id), - [rseq_cs] "m" (rseq_get_abi()->rseq_cs), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), [expect] "r" (expect), [v] "Qo" (*v), [newv] "r" (newv), @@ -448,7 +448,7 @@ int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect, : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), [current_cpu_id] "Qo" (rseq_get_abi()->cpu_id), - [rseq_cs] "m" (rseq_get_abi()->rseq_cs), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), [expect] "r" (expect), [v] "Qo" (*v), [newv] "r" (newv), @@ -509,7 +509,7 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect, : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), [current_cpu_id] "Qo" (rseq_get_abi()->cpu_id), - [rseq_cs] "m" (rseq_get_abi()->rseq_cs), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), [v] "Qo" (*v), [expect] "r" (expect), [v2] "Qo" (*v2), @@ -570,7 +570,7 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect, : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), [current_cpu_id] "Qo" (rseq_get_abi()->cpu_id), - [rseq_cs] "m" (rseq_get_abi()->rseq_cs), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), [expect] "r" (expect), [v] "Qo" (*v), [newv] "r" (newv), @@ -630,7 +630,7 @@ int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect, : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), [current_cpu_id] "Qo" (rseq_get_abi()->cpu_id), - [rseq_cs] "m" (rseq_get_abi()->rseq_cs), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), [expect] "r" (expect), [v] "Qo" (*v), [newv] "r" (newv), diff --git a/tools/testing/selftests/rseq/rseq-mips.h b/tools/testing/selftests/rseq/rseq-mips.h index 2b1f5bd9526819..6df54273825df9 100644 --- a/tools/testing/selftests/rseq/rseq-mips.h +++ b/tools/testing/selftests/rseq/rseq-mips.h @@ -191,7 +191,7 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), [current_cpu_id] "m" (rseq_get_abi()->cpu_id), - [rseq_cs] "m" (rseq_get_abi()->rseq_cs), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), [v] "m" (*v), [expect] "r" (expect), [newv] "r" (newv) @@ -259,7 +259,7 @@ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot, : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), [current_cpu_id] "m" (rseq_get_abi()->cpu_id), - [rseq_cs] "m" (rseq_get_abi()->rseq_cs), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), /* final store input */ [v] "m" (*v), [expectnot] "r" (expectnot), @@ -320,7 +320,7 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), [current_cpu_id] "m" (rseq_get_abi()->cpu_id), - [rseq_cs] "m" (rseq_get_abi()->rseq_cs), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), [v] "m" (*v), [count] "Ir" (count) RSEQ_INJECT_INPUT @@ -383,7 +383,7 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect, : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), [current_cpu_id] "m" (rseq_get_abi()->cpu_id), - [rseq_cs] "m" (rseq_get_abi()->rseq_cs), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), /* try store input */ [v2] "m" (*v2), [newv2] "r" (newv2), @@ -457,7 +457,7 @@ int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect, : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), [current_cpu_id] "m" (rseq_get_abi()->cpu_id), - [rseq_cs] "m" (rseq_get_abi()->rseq_cs), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), /* try store input */ [v2] "m" (*v2), [newv2] "r" (newv2), @@ -533,7 +533,7 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect, : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), [current_cpu_id] "m" (rseq_get_abi()->cpu_id), - [rseq_cs] "m" (rseq_get_abi()->rseq_cs), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), /* cmp2 input */ [v2] "m" (*v2), [expect2] "r" (expect2), @@ -650,7 +650,7 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect, : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), [current_cpu_id] "m" (rseq_get_abi()->cpu_id), - [rseq_cs] "m" (rseq_get_abi()->rseq_cs), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), /* final store input */ [v] "m" (*v), [expect] "r" (expect), @@ -772,7 +772,7 @@ int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect, : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), [current_cpu_id] "m" (rseq_get_abi()->cpu_id), - [rseq_cs] "m" (rseq_get_abi()->rseq_cs), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), /* final store input */ [v] "m" (*v), [expect] "r" (expect), diff --git a/tools/testing/selftests/rseq/rseq-ppc.h b/tools/testing/selftests/rseq/rseq-ppc.h index 2e6b7572ba0800..c4ba1375285d2b 100644 --- a/tools/testing/selftests/rseq/rseq-ppc.h +++ b/tools/testing/selftests/rseq/rseq-ppc.h @@ -236,7 +236,7 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), [current_cpu_id] "m" (rseq_get_abi()->cpu_id), - [rseq_cs] "m" (rseq_get_abi()->rseq_cs), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), [v] "m" (*v), [expect] "r" (expect), [newv] "r" (newv) @@ -302,7 +302,7 @@ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot, : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), [current_cpu_id] "m" (rseq_get_abi()->cpu_id), - [rseq_cs] "m" (rseq_get_abi()->rseq_cs), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), /* final store input */ [v] "m" (*v), [expectnot] "r" (expectnot), @@ -360,7 +360,7 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), [current_cpu_id] "m" (rseq_get_abi()->cpu_id), - [rseq_cs] "m" (rseq_get_abi()->rseq_cs), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), /* final store input */ [v] "m" (*v), [count] "r" (count) @@ -420,7 +420,7 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect, : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), [current_cpu_id] "m" (rseq_get_abi()->cpu_id), - [rseq_cs] "m" (rseq_get_abi()->rseq_cs), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), /* try store input */ [v2] "m" (*v2), [newv2] "r" (newv2), @@ -490,7 +490,7 @@ int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect, : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), [current_cpu_id] "m" (rseq_get_abi()->cpu_id), - [rseq_cs] "m" (rseq_get_abi()->rseq_cs), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), /* try store input */ [v2] "m" (*v2), [newv2] "r" (newv2), @@ -561,7 +561,7 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect, : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), [current_cpu_id] "m" (rseq_get_abi()->cpu_id), - [rseq_cs] "m" (rseq_get_abi()->rseq_cs), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), /* cmp2 input */ [v2] "m" (*v2), [expect2] "r" (expect2), @@ -636,7 +636,7 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect, : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), [current_cpu_id] "m" (rseq_get_abi()->cpu_id), - [rseq_cs] "m" (rseq_get_abi()->rseq_cs), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), /* final store input */ [v] "m" (*v), [expect] "r" (expect), @@ -712,7 +712,7 @@ int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect, : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), [current_cpu_id] "m" (rseq_get_abi()->cpu_id), - [rseq_cs] "m" (rseq_get_abi()->rseq_cs), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), /* final store input */ [v] "m" (*v), [expect] "r" (expect), diff --git a/tools/testing/selftests/rseq/rseq-s390.h b/tools/testing/selftests/rseq/rseq-s390.h index b906e044d2a348..9927021f8bd0cf 100644 --- a/tools/testing/selftests/rseq/rseq-s390.h +++ b/tools/testing/selftests/rseq/rseq-s390.h @@ -166,7 +166,7 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), [current_cpu_id] "m" (rseq_get_abi()->cpu_id), - [rseq_cs] "m" (rseq_get_abi()->rseq_cs), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), [v] "m" (*v), [expect] "r" (expect), [newv] "r" (newv) @@ -234,7 +234,7 @@ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot, : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), [current_cpu_id] "m" (rseq_get_abi()->cpu_id), - [rseq_cs] "m" (rseq_get_abi()->rseq_cs), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), /* final store input */ [v] "m" (*v), [expectnot] "r" (expectnot), @@ -289,7 +289,7 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), [current_cpu_id] "m" (rseq_get_abi()->cpu_id), - [rseq_cs] "m" (rseq_get_abi()->rseq_cs), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), /* final store input */ [v] "m" (*v), [count] "r" (count) @@ -348,7 +348,7 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect, : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), [current_cpu_id] "m" (rseq_get_abi()->cpu_id), - [rseq_cs] "m" (rseq_get_abi()->rseq_cs), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), /* try store input */ [v2] "m" (*v2), [newv2] "r" (newv2), @@ -427,7 +427,7 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect, : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), [current_cpu_id] "m" (rseq_get_abi()->cpu_id), - [rseq_cs] "m" (rseq_get_abi()->rseq_cs), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), /* cmp2 input */ [v2] "m" (*v2), [expect2] "r" (expect2), @@ -535,7 +535,7 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect, : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), [current_cpu_id] "m" (rseq_get_abi()->cpu_id), - [rseq_cs] "m" (rseq_get_abi()->rseq_cs), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), /* final store input */ [v] "m" (*v), [expect] "r" (expect), From 56cbd6e40e41efbf9b2f40ddd4e97399700ad6b5 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Mon, 24 Jan 2022 12:12:47 -0500 Subject: [PATCH 0234/1056] selftests/rseq: Fix ppc32 missing instruction selection "u" and "x" for load/store commit de6b52a21420a18dc8a36438d581efd1313d5fe3 upstream. Building the rseq basic test with gcc version 5.4.0 20160609 (Ubuntu 5.4.0-6ubuntu1~16.04.12) Target: powerpc-linux-gnu leads to these errors: /tmp/ccieEWxU.s: Assembler messages: /tmp/ccieEWxU.s:118: Error: syntax error; found `,', expected `(' /tmp/ccieEWxU.s:118: Error: junk at end of line: `,8' /tmp/ccieEWxU.s:121: Error: syntax error; found `,', expected `(' /tmp/ccieEWxU.s:121: Error: junk at end of line: `,8' /tmp/ccieEWxU.s:626: Error: syntax error; found `,', expected `(' /tmp/ccieEWxU.s:626: Error: junk at end of line: `,8' /tmp/ccieEWxU.s:629: Error: syntax error; found `,', expected `(' /tmp/ccieEWxU.s:629: Error: junk at end of line: `,8' /tmp/ccieEWxU.s:735: Error: syntax error; found `,', expected `(' /tmp/ccieEWxU.s:735: Error: junk at end of line: `,8' /tmp/ccieEWxU.s:738: Error: syntax error; found `,', expected `(' /tmp/ccieEWxU.s:738: Error: junk at end of line: `,8' /tmp/ccieEWxU.s:741: Error: syntax error; found `,', expected `(' /tmp/ccieEWxU.s:741: Error: junk at end of line: `,8' Makefile:581: recipe for target 'basic_percpu_ops_test.o' failed Based on discussion with Linux powerpc maintainers and review of the use of the "m" operand in powerpc kernel code, add the missing %Un%Xn (where n is operand number) to the lwz, stw, ld, and std instructions when used with "m" operands. Using "WORD" to mean either a 32-bit or 64-bit type depending on the architecture is misleading. The term "WORD" really means a 32-bit type in both 32-bit and 64-bit powerpc assembler. The intent here is to wrap load/store to intptr_t into common macros for both 32-bit and 64-bit. Rename the macros with a RSEQ_ prefix, and use the terms "INT" for always 32-bit type, and "LONG" for architecture bitness-sized type. Signed-off-by: Mathieu Desnoyers Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20220124171253.22072-10-mathieu.desnoyers@efficios.com Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/rseq/rseq-ppc.h | 55 +++++++++++++------------ 1 file changed, 28 insertions(+), 27 deletions(-) diff --git a/tools/testing/selftests/rseq/rseq-ppc.h b/tools/testing/selftests/rseq/rseq-ppc.h index c4ba1375285d2b..87befda47ba47c 100644 --- a/tools/testing/selftests/rseq/rseq-ppc.h +++ b/tools/testing/selftests/rseq/rseq-ppc.h @@ -47,10 +47,13 @@ do { \ #ifdef __PPC64__ -#define STORE_WORD "std " -#define LOAD_WORD "ld " -#define LOADX_WORD "ldx " -#define CMP_WORD "cmpd " +#define RSEQ_STORE_LONG(arg) "std%U[" __rseq_str(arg) "]%X[" __rseq_str(arg) "] " /* To memory ("m" constraint) */ +#define RSEQ_STORE_INT(arg) "stw%U[" __rseq_str(arg) "]%X[" __rseq_str(arg) "] " /* To memory ("m" constraint) */ +#define RSEQ_LOAD_LONG(arg) "ld%U[" __rseq_str(arg) "]%X[" __rseq_str(arg) "] " /* From memory ("m" constraint) */ +#define RSEQ_LOAD_INT(arg) "lwz%U[" __rseq_str(arg) "]%X[" __rseq_str(arg) "] " /* From memory ("m" constraint) */ +#define RSEQ_LOADX_LONG "ldx " /* From base register ("b" constraint) */ +#define RSEQ_CMP_LONG "cmpd " +#define RSEQ_CMP_LONG_INT "cmpdi " #define __RSEQ_ASM_DEFINE_TABLE(label, version, flags, \ start_ip, post_commit_offset, abort_ip) \ @@ -89,10 +92,13 @@ do { \ #else /* #ifdef __PPC64__ */ -#define STORE_WORD "stw " -#define LOAD_WORD "lwz " -#define LOADX_WORD "lwzx " -#define CMP_WORD "cmpw " +#define RSEQ_STORE_LONG(arg) "stw%U[" __rseq_str(arg) "]%X[" __rseq_str(arg) "] " /* To memory ("m" constraint) */ +#define RSEQ_STORE_INT(arg) RSEQ_STORE_LONG(arg) /* To memory ("m" constraint) */ +#define RSEQ_LOAD_LONG(arg) "lwz%U[" __rseq_str(arg) "]%X[" __rseq_str(arg) "] " /* From memory ("m" constraint) */ +#define RSEQ_LOAD_INT(arg) RSEQ_LOAD_LONG(arg) /* From memory ("m" constraint) */ +#define RSEQ_LOADX_LONG "lwzx " /* From base register ("b" constraint) */ +#define RSEQ_CMP_LONG "cmpw " +#define RSEQ_CMP_LONG_INT "cmpwi " #define __RSEQ_ASM_DEFINE_TABLE(label, version, flags, \ start_ip, post_commit_offset, abort_ip) \ @@ -125,7 +131,7 @@ do { \ RSEQ_INJECT_ASM(1) \ "lis %%r17, (" __rseq_str(cs_label) ")@ha\n\t" \ "addi %%r17, %%r17, (" __rseq_str(cs_label) ")@l\n\t" \ - "stw %%r17, %[" __rseq_str(rseq_cs) "]\n\t" \ + RSEQ_STORE_INT(rseq_cs) "%%r17, %[" __rseq_str(rseq_cs) "]\n\t" \ __rseq_str(label) ":\n\t" #endif /* #ifdef __PPC64__ */ @@ -136,7 +142,7 @@ do { \ #define RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, label) \ RSEQ_INJECT_ASM(2) \ - "lwz %%r17, %[" __rseq_str(current_cpu_id) "]\n\t" \ + RSEQ_LOAD_INT(current_cpu_id) "%%r17, %[" __rseq_str(current_cpu_id) "]\n\t" \ "cmpw cr7, %[" __rseq_str(cpu_id) "], %%r17\n\t" \ "bne- cr7, " __rseq_str(label) "\n\t" @@ -153,25 +159,25 @@ do { \ * RSEQ_ASM_OP_* (else): doesn't have hard-code registers(unless cr7) */ #define RSEQ_ASM_OP_CMPEQ(var, expect, label) \ - LOAD_WORD "%%r17, %[" __rseq_str(var) "]\n\t" \ - CMP_WORD "cr7, %%r17, %[" __rseq_str(expect) "]\n\t" \ + RSEQ_LOAD_LONG(var) "%%r17, %[" __rseq_str(var) "]\n\t" \ + RSEQ_CMP_LONG "cr7, %%r17, %[" __rseq_str(expect) "]\n\t" \ "bne- cr7, " __rseq_str(label) "\n\t" #define RSEQ_ASM_OP_CMPNE(var, expectnot, label) \ - LOAD_WORD "%%r17, %[" __rseq_str(var) "]\n\t" \ - CMP_WORD "cr7, %%r17, %[" __rseq_str(expectnot) "]\n\t" \ + RSEQ_LOAD_LONG(var) "%%r17, %[" __rseq_str(var) "]\n\t" \ + RSEQ_CMP_LONG "cr7, %%r17, %[" __rseq_str(expectnot) "]\n\t" \ "beq- cr7, " __rseq_str(label) "\n\t" #define RSEQ_ASM_OP_STORE(value, var) \ - STORE_WORD "%[" __rseq_str(value) "], %[" __rseq_str(var) "]\n\t" + RSEQ_STORE_LONG(var) "%[" __rseq_str(value) "], %[" __rseq_str(var) "]\n\t" /* Load @var to r17 */ #define RSEQ_ASM_OP_R_LOAD(var) \ - LOAD_WORD "%%r17, %[" __rseq_str(var) "]\n\t" + RSEQ_LOAD_LONG(var) "%%r17, %[" __rseq_str(var) "]\n\t" /* Store r17 to @var */ #define RSEQ_ASM_OP_R_STORE(var) \ - STORE_WORD "%%r17, %[" __rseq_str(var) "]\n\t" + RSEQ_STORE_LONG(var) "%%r17, %[" __rseq_str(var) "]\n\t" /* Add @count to r17 */ #define RSEQ_ASM_OP_R_ADD(count) \ @@ -179,11 +185,11 @@ do { \ /* Load (r17 + voffp) to r17 */ #define RSEQ_ASM_OP_R_LOADX(voffp) \ - LOADX_WORD "%%r17, %[" __rseq_str(voffp) "], %%r17\n\t" + RSEQ_LOADX_LONG "%%r17, %[" __rseq_str(voffp) "], %%r17\n\t" /* TODO: implement a faster memcpy. */ #define RSEQ_ASM_OP_R_MEMCPY() \ - "cmpdi %%r19, 0\n\t" \ + RSEQ_CMP_LONG_INT "%%r19, 0\n\t" \ "beq 333f\n\t" \ "addi %%r20, %%r20, -1\n\t" \ "addi %%r21, %%r21, -1\n\t" \ @@ -191,16 +197,16 @@ do { \ "lbzu %%r18, 1(%%r20)\n\t" \ "stbu %%r18, 1(%%r21)\n\t" \ "addi %%r19, %%r19, -1\n\t" \ - "cmpdi %%r19, 0\n\t" \ + RSEQ_CMP_LONG_INT "%%r19, 0\n\t" \ "bne 222b\n\t" \ "333:\n\t" \ #define RSEQ_ASM_OP_R_FINAL_STORE(var, post_commit_label) \ - STORE_WORD "%%r17, %[" __rseq_str(var) "]\n\t" \ + RSEQ_STORE_LONG(var) "%%r17, %[" __rseq_str(var) "]\n\t" \ __rseq_str(post_commit_label) ":\n\t" #define RSEQ_ASM_OP_FINAL_STORE(value, var, post_commit_label) \ - STORE_WORD "%[" __rseq_str(value) "], %[" __rseq_str(var) "]\n\t" \ + RSEQ_STORE_LONG(var) "%[" __rseq_str(value) "], %[" __rseq_str(var) "]\n\t" \ __rseq_str(post_commit_label) ":\n\t" static inline __attribute__((always_inline)) @@ -743,9 +749,4 @@ int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect, #endif } -#undef STORE_WORD -#undef LOAD_WORD -#undef LOADX_WORD -#undef CMP_WORD - #endif /* !RSEQ_SKIP_FASTPATH */ From 21199d90428e52f93b8f615a8ba23dc28a97e110 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Mon, 24 Jan 2022 12:12:48 -0500 Subject: [PATCH 0235/1056] selftests/rseq: Fix ppc32 offsets by using long rather than off_t commit 26dc8a6d8e11552f3b797b5aafe01071ca32d692 upstream. The semantic of off_t is for file offsets. We mean to use it as an offset from a pointer. We really expect it to fit in a single register, and not use a 64-bit type on 32-bit architectures. Fix runtime issues on ppc32 where the offset is always 0 due to inconsistency between the argument type (off_t -> 64-bit) and type expected by the inline assembler (32-bit). Signed-off-by: Mathieu Desnoyers Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20220124171253.22072-11-mathieu.desnoyers@efficios.com Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/rseq/basic_percpu_ops_test.c | 2 +- tools/testing/selftests/rseq/param_test.c | 2 +- tools/testing/selftests/rseq/rseq-arm.h | 2 +- tools/testing/selftests/rseq/rseq-arm64.h | 2 +- tools/testing/selftests/rseq/rseq-mips.h | 2 +- tools/testing/selftests/rseq/rseq-ppc.h | 2 +- tools/testing/selftests/rseq/rseq-s390.h | 2 +- tools/testing/selftests/rseq/rseq-skip.h | 2 +- tools/testing/selftests/rseq/rseq-x86.h | 6 +++--- 9 files changed, 11 insertions(+), 11 deletions(-) diff --git a/tools/testing/selftests/rseq/basic_percpu_ops_test.c b/tools/testing/selftests/rseq/basic_percpu_ops_test.c index b953a52ff706c8..517756afc2a4e5 100644 --- a/tools/testing/selftests/rseq/basic_percpu_ops_test.c +++ b/tools/testing/selftests/rseq/basic_percpu_ops_test.c @@ -167,7 +167,7 @@ struct percpu_list_node *this_cpu_list_pop(struct percpu_list *list, for (;;) { struct percpu_list_node *head; intptr_t *targetptr, expectnot, *load; - off_t offset; + long offset; int ret, cpu; cpu = rseq_cpu_start(); diff --git a/tools/testing/selftests/rseq/param_test.c b/tools/testing/selftests/rseq/param_test.c index cc2cfc1da9383f..335c290b39e7cd 100644 --- a/tools/testing/selftests/rseq/param_test.c +++ b/tools/testing/selftests/rseq/param_test.c @@ -549,7 +549,7 @@ struct percpu_list_node *this_cpu_list_pop(struct percpu_list *list, for (;;) { struct percpu_list_node *head; intptr_t *targetptr, expectnot, *load; - off_t offset; + long offset; int ret; cpu = rseq_cpu_start(); diff --git a/tools/testing/selftests/rseq/rseq-arm.h b/tools/testing/selftests/rseq/rseq-arm.h index 5f567b3b40f2a3..ae476af701528e 100644 --- a/tools/testing/selftests/rseq/rseq-arm.h +++ b/tools/testing/selftests/rseq/rseq-arm.h @@ -217,7 +217,7 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu) static inline __attribute__((always_inline)) int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot, - off_t voffp, intptr_t *load, int cpu) + long voffp, intptr_t *load, int cpu) { RSEQ_INJECT_C(9) diff --git a/tools/testing/selftests/rseq/rseq-arm64.h b/tools/testing/selftests/rseq/rseq-arm64.h index d0f2b7feee9413..7806817062c2e6 100644 --- a/tools/testing/selftests/rseq/rseq-arm64.h +++ b/tools/testing/selftests/rseq/rseq-arm64.h @@ -259,7 +259,7 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu) static inline __attribute__((always_inline)) int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot, - off_t voffp, intptr_t *load, int cpu) + long voffp, intptr_t *load, int cpu) { RSEQ_INJECT_C(9) diff --git a/tools/testing/selftests/rseq/rseq-mips.h b/tools/testing/selftests/rseq/rseq-mips.h index 6df54273825df9..0d1d9255da700f 100644 --- a/tools/testing/selftests/rseq/rseq-mips.h +++ b/tools/testing/selftests/rseq/rseq-mips.h @@ -222,7 +222,7 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu) static inline __attribute__((always_inline)) int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot, - off_t voffp, intptr_t *load, int cpu) + long voffp, intptr_t *load, int cpu) { RSEQ_INJECT_C(9) diff --git a/tools/testing/selftests/rseq/rseq-ppc.h b/tools/testing/selftests/rseq/rseq-ppc.h index 87befda47ba47c..aa18c0eabf40ab 100644 --- a/tools/testing/selftests/rseq/rseq-ppc.h +++ b/tools/testing/selftests/rseq/rseq-ppc.h @@ -270,7 +270,7 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu) static inline __attribute__((always_inline)) int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot, - off_t voffp, intptr_t *load, int cpu) + long voffp, intptr_t *load, int cpu) { RSEQ_INJECT_C(9) diff --git a/tools/testing/selftests/rseq/rseq-s390.h b/tools/testing/selftests/rseq/rseq-s390.h index 9927021f8bd0cf..0f523b3ecdef8c 100644 --- a/tools/testing/selftests/rseq/rseq-s390.h +++ b/tools/testing/selftests/rseq/rseq-s390.h @@ -198,7 +198,7 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu) */ static inline __attribute__((always_inline)) int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot, - off_t voffp, intptr_t *load, int cpu) + long voffp, intptr_t *load, int cpu) { RSEQ_INJECT_C(9) diff --git a/tools/testing/selftests/rseq/rseq-skip.h b/tools/testing/selftests/rseq/rseq-skip.h index 72750b5905a966..7b53dac1fcdd9c 100644 --- a/tools/testing/selftests/rseq/rseq-skip.h +++ b/tools/testing/selftests/rseq/rseq-skip.h @@ -13,7 +13,7 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu) static inline __attribute__((always_inline)) int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot, - off_t voffp, intptr_t *load, int cpu) + long voffp, intptr_t *load, int cpu) { return -1; } diff --git a/tools/testing/selftests/rseq/rseq-x86.h b/tools/testing/selftests/rseq/rseq-x86.h index 1d9fa0516e5365..0ee6c041d4be98 100644 --- a/tools/testing/selftests/rseq/rseq-x86.h +++ b/tools/testing/selftests/rseq/rseq-x86.h @@ -172,7 +172,7 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu) */ static inline __attribute__((always_inline)) int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot, - off_t voffp, intptr_t *load, int cpu) + long voffp, intptr_t *load, int cpu) { RSEQ_INJECT_C(9) @@ -286,7 +286,7 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu) * *pval += inc; */ static inline __attribute__((always_inline)) -int rseq_offset_deref_addv(intptr_t *ptr, off_t off, intptr_t inc, int cpu) +int rseq_offset_deref_addv(intptr_t *ptr, long off, intptr_t inc, int cpu) { RSEQ_INJECT_C(9) @@ -750,7 +750,7 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu) */ static inline __attribute__((always_inline)) int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot, - off_t voffp, intptr_t *load, int cpu) + long voffp, intptr_t *load, int cpu) { RSEQ_INJECT_C(9) From 33307f2afd8579ccb41ef77dfcab7e148e82e5fd Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Mon, 24 Jan 2022 12:12:49 -0500 Subject: [PATCH 0236/1056] selftests/rseq: Fix warnings about #if checks of undefined tokens commit d7ed99ade3e62b755584eea07b4e499e79240527 upstream. Signed-off-by: Mathieu Desnoyers Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20220124171253.22072-12-mathieu.desnoyers@efficios.com Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/rseq/param_test.c | 2 +- tools/testing/selftests/rseq/rseq-x86.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/rseq/param_test.c b/tools/testing/selftests/rseq/param_test.c index 335c290b39e7cd..da23c22d58820f 100644 --- a/tools/testing/selftests/rseq/param_test.c +++ b/tools/testing/selftests/rseq/param_test.c @@ -161,7 +161,7 @@ unsigned int yield_mod_cnt, nr_abort; " cbnz " INJECT_ASM_REG ", 222b\n" \ "333:\n" -#elif __PPC__ +#elif defined(__PPC__) #define RSEQ_INJECT_INPUT \ , [loop_cnt_1]"m"(loop_cnt[1]) \ diff --git a/tools/testing/selftests/rseq/rseq-x86.h b/tools/testing/selftests/rseq/rseq-x86.h index 0ee6c041d4be98..e444563c699958 100644 --- a/tools/testing/selftests/rseq/rseq-x86.h +++ b/tools/testing/selftests/rseq/rseq-x86.h @@ -600,7 +600,7 @@ int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect, #endif /* !RSEQ_SKIP_FASTPATH */ -#elif __i386__ +#elif defined(__i386__) #define rseq_smp_mb() \ __asm__ __volatile__ ("lock; addl $0,-128(%%esp)" ::: "memory", "cc") From 786bd3511934157f88e141d0a8494fcfb02b02f6 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Mon, 24 Jan 2022 12:12:50 -0500 Subject: [PATCH 0237/1056] selftests/rseq: Remove arm/mips asm goto compiler work-around commit 94c5cf2a0e193afffef8de48ddc42de6df7cac93 upstream. The arm and mips work-around for asm goto size guess issues are not properly documented, and lack reference to specific compiler versions, upstream compiler bug tracker entry, and reproducer. I can only find a loosely documented patch in my original LKML rseq post refering to gcc < 7 on ARM, but it does not appear to be sufficient to track the exact issue. Also, I am not sure MIPS really has the same limitation. Therefore, remove the work-around until we can properly document this. Signed-off-by: Mathieu Desnoyers Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/lkml/20171121141900.18471-17-mathieu.desnoyers@efficios.com/ Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/rseq/rseq-arm.h | 37 ------------------------ tools/testing/selftests/rseq/rseq-mips.h | 37 ------------------------ 2 files changed, 74 deletions(-) diff --git a/tools/testing/selftests/rseq/rseq-arm.h b/tools/testing/selftests/rseq/rseq-arm.h index ae476af701528e..64c3a622107b1e 100644 --- a/tools/testing/selftests/rseq/rseq-arm.h +++ b/tools/testing/selftests/rseq/rseq-arm.h @@ -147,14 +147,11 @@ do { \ teardown \ "b %l[" __rseq_str(cmpfail_label) "]\n\t" -#define rseq_workaround_gcc_asm_size_guess() __asm__ __volatile__("") - static inline __attribute__((always_inline)) int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu) { RSEQ_INJECT_C(9) - rseq_workaround_gcc_asm_size_guess(); __asm__ __volatile__ goto ( RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail]) @@ -198,14 +195,11 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu) , error1, error2 #endif ); - rseq_workaround_gcc_asm_size_guess(); return 0; abort: - rseq_workaround_gcc_asm_size_guess(); RSEQ_INJECT_FAILED return -1; cmpfail: - rseq_workaround_gcc_asm_size_guess(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: @@ -221,7 +215,6 @@ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot, { RSEQ_INJECT_C(9) - rseq_workaround_gcc_asm_size_guess(); __asm__ __volatile__ goto ( RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail]) @@ -270,14 +263,11 @@ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot, , error1, error2 #endif ); - rseq_workaround_gcc_asm_size_guess(); return 0; abort: - rseq_workaround_gcc_asm_size_guess(); RSEQ_INJECT_FAILED return -1; cmpfail: - rseq_workaround_gcc_asm_size_guess(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: @@ -292,7 +282,6 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu) { RSEQ_INJECT_C(9) - rseq_workaround_gcc_asm_size_guess(); __asm__ __volatile__ goto ( RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */ #ifdef RSEQ_COMPARE_TWICE @@ -328,10 +317,8 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu) , error1 #endif ); - rseq_workaround_gcc_asm_size_guess(); return 0; abort: - rseq_workaround_gcc_asm_size_guess(); RSEQ_INJECT_FAILED return -1; #ifdef RSEQ_COMPARE_TWICE @@ -347,7 +334,6 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect, { RSEQ_INJECT_C(9) - rseq_workaround_gcc_asm_size_guess(); __asm__ __volatile__ goto ( RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail]) @@ -398,14 +384,11 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect, , error1, error2 #endif ); - rseq_workaround_gcc_asm_size_guess(); return 0; abort: - rseq_workaround_gcc_asm_size_guess(); RSEQ_INJECT_FAILED return -1; cmpfail: - rseq_workaround_gcc_asm_size_guess(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: @@ -422,7 +405,6 @@ int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect, { RSEQ_INJECT_C(9) - rseq_workaround_gcc_asm_size_guess(); __asm__ __volatile__ goto ( RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail]) @@ -474,14 +456,11 @@ int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect, , error1, error2 #endif ); - rseq_workaround_gcc_asm_size_guess(); return 0; abort: - rseq_workaround_gcc_asm_size_guess(); RSEQ_INJECT_FAILED return -1; cmpfail: - rseq_workaround_gcc_asm_size_guess(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: @@ -498,7 +477,6 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect, { RSEQ_INJECT_C(9) - rseq_workaround_gcc_asm_size_guess(); __asm__ __volatile__ goto ( RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail]) @@ -554,14 +532,11 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect, , error1, error2, error3 #endif ); - rseq_workaround_gcc_asm_size_guess(); return 0; abort: - rseq_workaround_gcc_asm_size_guess(); RSEQ_INJECT_FAILED return -1; cmpfail: - rseq_workaround_gcc_asm_size_guess(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: @@ -582,7 +557,6 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect, RSEQ_INJECT_C(9) - rseq_workaround_gcc_asm_size_guess(); __asm__ __volatile__ goto ( RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail]) @@ -678,21 +652,16 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect, , error1, error2 #endif ); - rseq_workaround_gcc_asm_size_guess(); return 0; abort: - rseq_workaround_gcc_asm_size_guess(); RSEQ_INJECT_FAILED return -1; cmpfail: - rseq_workaround_gcc_asm_size_guess(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: - rseq_workaround_gcc_asm_size_guess(); rseq_bug("cpu_id comparison failed"); error2: - rseq_workaround_gcc_asm_size_guess(); rseq_bug("expected value comparison failed"); #endif } @@ -706,7 +675,6 @@ int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect, RSEQ_INJECT_C(9) - rseq_workaround_gcc_asm_size_guess(); __asm__ __volatile__ goto ( RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail]) @@ -803,21 +771,16 @@ int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect, , error1, error2 #endif ); - rseq_workaround_gcc_asm_size_guess(); return 0; abort: - rseq_workaround_gcc_asm_size_guess(); RSEQ_INJECT_FAILED return -1; cmpfail: - rseq_workaround_gcc_asm_size_guess(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: - rseq_workaround_gcc_asm_size_guess(); rseq_bug("cpu_id comparison failed"); error2: - rseq_workaround_gcc_asm_size_guess(); rseq_bug("expected value comparison failed"); #endif } diff --git a/tools/testing/selftests/rseq/rseq-mips.h b/tools/testing/selftests/rseq/rseq-mips.h index 0d1d9255da700f..878739fae2fdeb 100644 --- a/tools/testing/selftests/rseq/rseq-mips.h +++ b/tools/testing/selftests/rseq/rseq-mips.h @@ -154,14 +154,11 @@ do { \ teardown \ "b %l[" __rseq_str(cmpfail_label) "]\n\t" -#define rseq_workaround_gcc_asm_size_guess() __asm__ __volatile__("") - static inline __attribute__((always_inline)) int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu) { RSEQ_INJECT_C(9) - rseq_workaround_gcc_asm_size_guess(); __asm__ __volatile__ goto ( RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail]) @@ -203,14 +200,11 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu) , error1, error2 #endif ); - rseq_workaround_gcc_asm_size_guess(); return 0; abort: - rseq_workaround_gcc_asm_size_guess(); RSEQ_INJECT_FAILED return -1; cmpfail: - rseq_workaround_gcc_asm_size_guess(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: @@ -226,7 +220,6 @@ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot, { RSEQ_INJECT_C(9) - rseq_workaround_gcc_asm_size_guess(); __asm__ __volatile__ goto ( RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail]) @@ -273,14 +266,11 @@ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot, , error1, error2 #endif ); - rseq_workaround_gcc_asm_size_guess(); return 0; abort: - rseq_workaround_gcc_asm_size_guess(); RSEQ_INJECT_FAILED return -1; cmpfail: - rseq_workaround_gcc_asm_size_guess(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: @@ -295,7 +285,6 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu) { RSEQ_INJECT_C(9) - rseq_workaround_gcc_asm_size_guess(); __asm__ __volatile__ goto ( RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */ #ifdef RSEQ_COMPARE_TWICE @@ -331,10 +320,8 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu) , error1 #endif ); - rseq_workaround_gcc_asm_size_guess(); return 0; abort: - rseq_workaround_gcc_asm_size_guess(); RSEQ_INJECT_FAILED return -1; #ifdef RSEQ_COMPARE_TWICE @@ -350,7 +337,6 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect, { RSEQ_INJECT_C(9) - rseq_workaround_gcc_asm_size_guess(); __asm__ __volatile__ goto ( RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail]) @@ -399,14 +385,11 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect, , error1, error2 #endif ); - rseq_workaround_gcc_asm_size_guess(); return 0; abort: - rseq_workaround_gcc_asm_size_guess(); RSEQ_INJECT_FAILED return -1; cmpfail: - rseq_workaround_gcc_asm_size_guess(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: @@ -423,7 +406,6 @@ int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect, { RSEQ_INJECT_C(9) - rseq_workaround_gcc_asm_size_guess(); __asm__ __volatile__ goto ( RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail]) @@ -473,14 +455,11 @@ int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect, , error1, error2 #endif ); - rseq_workaround_gcc_asm_size_guess(); return 0; abort: - rseq_workaround_gcc_asm_size_guess(); RSEQ_INJECT_FAILED return -1; cmpfail: - rseq_workaround_gcc_asm_size_guess(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: @@ -497,7 +476,6 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect, { RSEQ_INJECT_C(9) - rseq_workaround_gcc_asm_size_guess(); __asm__ __volatile__ goto ( RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail]) @@ -549,14 +527,11 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect, , error1, error2, error3 #endif ); - rseq_workaround_gcc_asm_size_guess(); return 0; abort: - rseq_workaround_gcc_asm_size_guess(); RSEQ_INJECT_FAILED return -1; cmpfail: - rseq_workaround_gcc_asm_size_guess(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: @@ -577,7 +552,6 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect, RSEQ_INJECT_C(9) - rseq_workaround_gcc_asm_size_guess(); __asm__ __volatile__ goto ( RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail]) @@ -670,21 +644,16 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect, , error1, error2 #endif ); - rseq_workaround_gcc_asm_size_guess(); return 0; abort: - rseq_workaround_gcc_asm_size_guess(); RSEQ_INJECT_FAILED return -1; cmpfail: - rseq_workaround_gcc_asm_size_guess(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: - rseq_workaround_gcc_asm_size_guess(); rseq_bug("cpu_id comparison failed"); error2: - rseq_workaround_gcc_asm_size_guess(); rseq_bug("expected value comparison failed"); #endif } @@ -698,7 +667,6 @@ int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect, RSEQ_INJECT_C(9) - rseq_workaround_gcc_asm_size_guess(); __asm__ __volatile__ goto ( RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail]) @@ -792,21 +760,16 @@ int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect, , error1, error2 #endif ); - rseq_workaround_gcc_asm_size_guess(); return 0; abort: - rseq_workaround_gcc_asm_size_guess(); RSEQ_INJECT_FAILED return -1; cmpfail: - rseq_workaround_gcc_asm_size_guess(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: - rseq_workaround_gcc_asm_size_guess(); rseq_bug("cpu_id comparison failed"); error2: - rseq_workaround_gcc_asm_size_guess(); rseq_bug("expected value comparison failed"); #endif } From 4633aa6fadc68396637b464f01e9ab98249ca2ba Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Mon, 24 Jan 2022 12:12:51 -0500 Subject: [PATCH 0238/1056] selftests/rseq: Fix: work-around asm goto compiler bugs commit b53823fb2ef854222853be164f3b1e815f315144 upstream. gcc and clang each have their own compiler bugs with respect to asm goto. Implement a work-around for compiler versions known to have those bugs. gcc prior to 4.8.2 miscompiles asm goto. https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58670 gcc prior to 8.1.0 miscompiles asm goto at O1. https://gcc.gnu.org/bugzilla/show_bug.cgi?id=103908 clang prior to version 13.0.1 miscompiles asm goto at O2. https://github.com/llvm/llvm-project/issues/52735 Work around these issues by adding a volatile inline asm with memory clobber in the fallthrough after the asm goto and at each label target. Emit this for all compilers in case other similar issues are found in the future. Signed-off-by: Mathieu Desnoyers Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20220124171253.22072-14-mathieu.desnoyers@efficios.com Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/rseq/compiler.h | 30 ++++++++++ tools/testing/selftests/rseq/rseq-arm.h | 39 +++++++++++++ tools/testing/selftests/rseq/rseq-arm64.h | 45 +++++++++++++-- tools/testing/selftests/rseq/rseq-ppc.h | 39 +++++++++++++ tools/testing/selftests/rseq/rseq-s390.h | 29 ++++++++++ tools/testing/selftests/rseq/rseq-x86.h | 68 +++++++++++++++++++++++ tools/testing/selftests/rseq/rseq.h | 1 + 7 files changed, 245 insertions(+), 6 deletions(-) create mode 100644 tools/testing/selftests/rseq/compiler.h diff --git a/tools/testing/selftests/rseq/compiler.h b/tools/testing/selftests/rseq/compiler.h new file mode 100644 index 00000000000000..876eb6a7f75bee --- /dev/null +++ b/tools/testing/selftests/rseq/compiler.h @@ -0,0 +1,30 @@ +/* SPDX-License-Identifier: LGPL-2.1-only OR MIT */ +/* + * rseq/compiler.h + * + * Work-around asm goto compiler bugs. + * + * (C) Copyright 2021 - Mathieu Desnoyers + */ + +#ifndef RSEQ_COMPILER_H +#define RSEQ_COMPILER_H + +/* + * gcc prior to 4.8.2 miscompiles asm goto. + * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58670 + * + * gcc prior to 8.1.0 miscompiles asm goto at O1. + * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=103908 + * + * clang prior to version 13.0.1 miscompiles asm goto at O2. + * https://github.com/llvm/llvm-project/issues/52735 + * + * Work around these issues by adding a volatile inline asm with + * memory clobber in the fallthrough after the asm goto and at each + * label target. Emit this for all compilers in case other similar + * issues are found in the future. + */ +#define rseq_after_asm_goto() asm volatile ("" : : : "memory") + +#endif /* RSEQ_COMPILER_H_ */ diff --git a/tools/testing/selftests/rseq/rseq-arm.h b/tools/testing/selftests/rseq/rseq-arm.h index 64c3a622107b1e..893a11eca9d517 100644 --- a/tools/testing/selftests/rseq/rseq-arm.h +++ b/tools/testing/selftests/rseq/rseq-arm.h @@ -195,16 +195,21 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu) , error1, error2 #endif ); + rseq_after_asm_goto(); return 0; abort: + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; cmpfail: + rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); error2: + rseq_after_asm_goto(); rseq_bug("expected value comparison failed"); #endif } @@ -263,16 +268,21 @@ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot, , error1, error2 #endif ); + rseq_after_asm_goto(); return 0; abort: + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; cmpfail: + rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); error2: + rseq_after_asm_goto(); rseq_bug("expected value comparison failed"); #endif } @@ -317,12 +327,15 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu) , error1 #endif ); + rseq_after_asm_goto(); return 0; abort: + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); #endif } @@ -384,16 +397,21 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect, , error1, error2 #endif ); + rseq_after_asm_goto(); return 0; abort: + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; cmpfail: + rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); error2: + rseq_after_asm_goto(); rseq_bug("expected value comparison failed"); #endif } @@ -456,16 +474,21 @@ int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect, , error1, error2 #endif ); + rseq_after_asm_goto(); return 0; abort: + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; cmpfail: + rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); error2: + rseq_after_asm_goto(); rseq_bug("expected value comparison failed"); #endif } @@ -532,18 +555,24 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect, , error1, error2, error3 #endif ); + rseq_after_asm_goto(); return 0; abort: + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; cmpfail: + rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); error2: + rseq_after_asm_goto(); rseq_bug("1st expected value comparison failed"); error3: + rseq_after_asm_goto(); rseq_bug("2nd expected value comparison failed"); #endif } @@ -652,16 +681,21 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect, , error1, error2 #endif ); + rseq_after_asm_goto(); return 0; abort: + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; cmpfail: + rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); error2: + rseq_after_asm_goto(); rseq_bug("expected value comparison failed"); #endif } @@ -771,16 +805,21 @@ int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect, , error1, error2 #endif ); + rseq_after_asm_goto(); return 0; abort: + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; cmpfail: + rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); error2: + rseq_after_asm_goto(); rseq_bug("expected value comparison failed"); #endif } diff --git a/tools/testing/selftests/rseq/rseq-arm64.h b/tools/testing/selftests/rseq/rseq-arm64.h index 7806817062c2e6..cbe190a4d0056b 100644 --- a/tools/testing/selftests/rseq/rseq-arm64.h +++ b/tools/testing/selftests/rseq/rseq-arm64.h @@ -242,17 +242,21 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu) , error1, error2 #endif ); - + rseq_after_asm_goto(); return 0; abort: + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; cmpfail: + rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); error2: + rseq_after_asm_goto(); rseq_bug("expected value comparison failed"); #endif } @@ -300,16 +304,21 @@ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot, , error1, error2 #endif ); + rseq_after_asm_goto(); return 0; abort: + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; cmpfail: + rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); error2: + rseq_after_asm_goto(); rseq_bug("expected value comparison failed"); #endif } @@ -348,12 +357,15 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu) , error1 #endif ); + rseq_after_asm_goto(); return 0; abort: + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); #endif } @@ -402,17 +414,21 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect, , error1, error2 #endif ); - + rseq_after_asm_goto(); return 0; abort: + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; cmpfail: + rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); error2: + rseq_after_asm_goto(); rseq_bug("expected value comparison failed"); #endif } @@ -461,17 +477,21 @@ int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect, , error1, error2 #endif ); - + rseq_after_asm_goto(); return 0; abort: + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; cmpfail: + rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); error2: + rseq_after_asm_goto(); rseq_bug("expected value comparison failed"); #endif } @@ -522,19 +542,24 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect, , error1, error2, error3 #endif ); - + rseq_after_asm_goto(); return 0; abort: + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; cmpfail: + rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); error2: + rseq_after_asm_goto(); rseq_bug("expected value comparison failed"); error3: + rseq_after_asm_goto(); rseq_bug("2nd expected value comparison failed"); #endif } @@ -584,17 +609,21 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect, , error1, error2 #endif ); - + rseq_after_asm_goto(); return 0; abort: + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; cmpfail: + rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); error2: + rseq_after_asm_goto(); rseq_bug("expected value comparison failed"); #endif } @@ -644,17 +673,21 @@ int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect, , error1, error2 #endif ); - + rseq_after_asm_goto(); return 0; abort: + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; cmpfail: + rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); error2: + rseq_after_asm_goto(); rseq_bug("expected value comparison failed"); #endif } diff --git a/tools/testing/selftests/rseq/rseq-ppc.h b/tools/testing/selftests/rseq/rseq-ppc.h index aa18c0eabf40ab..bab8e0b9fb1154 100644 --- a/tools/testing/selftests/rseq/rseq-ppc.h +++ b/tools/testing/selftests/rseq/rseq-ppc.h @@ -254,16 +254,21 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu) , error1, error2 #endif ); + rseq_after_asm_goto(); return 0; abort: + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; cmpfail: + rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); error2: + rseq_after_asm_goto(); rseq_bug("expected value comparison failed"); #endif } @@ -322,16 +327,21 @@ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot, , error1, error2 #endif ); + rseq_after_asm_goto(); return 0; abort: + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; cmpfail: + rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); error2: + rseq_after_asm_goto(); rseq_bug("expected value comparison failed"); #endif } @@ -378,12 +388,15 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu) , error1 #endif ); + rseq_after_asm_goto(); return 0; abort: + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); #endif } @@ -442,16 +455,21 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect, , error1, error2 #endif ); + rseq_after_asm_goto(); return 0; abort: + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; cmpfail: + rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); error2: + rseq_after_asm_goto(); rseq_bug("expected value comparison failed"); #endif } @@ -512,16 +530,21 @@ int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect, , error1, error2 #endif ); + rseq_after_asm_goto(); return 0; abort: + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; cmpfail: + rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); error2: + rseq_after_asm_goto(); rseq_bug("expected value comparison failed"); #endif } @@ -583,18 +606,24 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect, , error1, error2, error3 #endif ); + rseq_after_asm_goto(); return 0; abort: + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; cmpfail: + rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); error2: + rseq_after_asm_goto(); rseq_bug("1st expected value comparison failed"); error3: + rseq_after_asm_goto(); rseq_bug("2nd expected value comparison failed"); #endif } @@ -659,16 +688,21 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect, , error1, error2 #endif ); + rseq_after_asm_goto(); return 0; abort: + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; cmpfail: + rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); error2: + rseq_after_asm_goto(); rseq_bug("expected value comparison failed"); #endif } @@ -735,16 +769,21 @@ int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect, , error1, error2 #endif ); + rseq_after_asm_goto(); return 0; abort: + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; cmpfail: + rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); error2: + rseq_after_asm_goto(); rseq_bug("expected value comparison failed"); #endif } diff --git a/tools/testing/selftests/rseq/rseq-s390.h b/tools/testing/selftests/rseq/rseq-s390.h index 0f523b3ecdef8c..4e6dc5f0cb4290 100644 --- a/tools/testing/selftests/rseq/rseq-s390.h +++ b/tools/testing/selftests/rseq/rseq-s390.h @@ -178,16 +178,21 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu) , error1, error2 #endif ); + rseq_after_asm_goto(); return 0; abort: + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; cmpfail: + rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); error2: + rseq_after_asm_goto(); rseq_bug("expected value comparison failed"); #endif } @@ -248,16 +253,21 @@ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot, , error1, error2 #endif ); + rseq_after_asm_goto(); return 0; abort: + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; cmpfail: + rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); error2: + rseq_after_asm_goto(); rseq_bug("expected value comparison failed"); #endif } @@ -301,12 +311,15 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu) , error1 #endif ); + rseq_after_asm_goto(); return 0; abort: + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); #endif } @@ -364,16 +377,21 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect, , error1, error2 #endif ); + rseq_after_asm_goto(); return 0; abort: + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; cmpfail: + rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); error2: + rseq_after_asm_goto(); rseq_bug("expected value comparison failed"); #endif } @@ -443,18 +461,24 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect, , error1, error2, error3 #endif ); + rseq_after_asm_goto(); return 0; abort: + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; cmpfail: + rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); error2: + rseq_after_asm_goto(); rseq_bug("1st expected value comparison failed"); error3: + rseq_after_asm_goto(); rseq_bug("2nd expected value comparison failed"); #endif } @@ -555,16 +579,21 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect, , error1, error2 #endif ); + rseq_after_asm_goto(); return 0; abort: + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; cmpfail: + rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); error2: + rseq_after_asm_goto(); rseq_bug("expected value comparison failed"); #endif } diff --git a/tools/testing/selftests/rseq/rseq-x86.h b/tools/testing/selftests/rseq/rseq-x86.h index e444563c699958..4ab2e74811abb6 100644 --- a/tools/testing/selftests/rseq/rseq-x86.h +++ b/tools/testing/selftests/rseq/rseq-x86.h @@ -152,16 +152,21 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu) , error1, error2 #endif ); + rseq_after_asm_goto(); return 0; abort: + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; cmpfail: + rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); error2: + rseq_after_asm_goto(); rseq_bug("expected value comparison failed"); #endif } @@ -220,16 +225,21 @@ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot, , error1, error2 #endif ); + rseq_after_asm_goto(); return 0; abort: + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; cmpfail: + rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); error2: + rseq_after_asm_goto(); rseq_bug("expected value comparison failed"); #endif } @@ -269,12 +279,15 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu) , error1 #endif ); + rseq_after_asm_goto(); return 0; abort: + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); #endif } @@ -387,16 +400,21 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect, , error1, error2 #endif ); + rseq_after_asm_goto(); return 0; abort: + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; cmpfail: + rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); error2: + rseq_after_asm_goto(); rseq_bug("expected value comparison failed"); #endif } @@ -464,18 +482,24 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect, , error1, error2, error3 #endif ); + rseq_after_asm_goto(); return 0; abort: + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; cmpfail: + rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); error2: + rseq_after_asm_goto(); rseq_bug("1st expected value comparison failed"); error3: + rseq_after_asm_goto(); rseq_bug("2nd expected value comparison failed"); #endif } @@ -574,16 +598,21 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect, , error1, error2 #endif ); + rseq_after_asm_goto(); return 0; abort: + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; cmpfail: + rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); error2: + rseq_after_asm_goto(); rseq_bug("expected value comparison failed"); #endif } @@ -730,16 +759,21 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu) , error1, error2 #endif ); + rseq_after_asm_goto(); return 0; abort: + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; cmpfail: + rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); error2: + rseq_after_asm_goto(); rseq_bug("expected value comparison failed"); #endif } @@ -798,16 +832,21 @@ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot, , error1, error2 #endif ); + rseq_after_asm_goto(); return 0; abort: + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; cmpfail: + rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); error2: + rseq_after_asm_goto(); rseq_bug("expected value comparison failed"); #endif } @@ -847,12 +886,15 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu) , error1 #endif ); + rseq_after_asm_goto(); return 0; abort: + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); #endif } @@ -909,16 +951,21 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect, , error1, error2 #endif ); + rseq_after_asm_goto(); return 0; abort: + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; cmpfail: + rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); error2: + rseq_after_asm_goto(); rseq_bug("expected value comparison failed"); #endif } @@ -977,16 +1024,21 @@ int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect, , error1, error2 #endif ); + rseq_after_asm_goto(); return 0; abort: + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; cmpfail: + rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); error2: + rseq_after_asm_goto(); rseq_bug("expected value comparison failed"); #endif @@ -1047,18 +1099,24 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect, , error1, error2, error3 #endif ); + rseq_after_asm_goto(); return 0; abort: + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; cmpfail: + rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); error2: + rseq_after_asm_goto(); rseq_bug("1st expected value comparison failed"); error3: + rseq_after_asm_goto(); rseq_bug("2nd expected value comparison failed"); #endif } @@ -1161,16 +1219,21 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect, , error1, error2 #endif ); + rseq_after_asm_goto(); return 0; abort: + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; cmpfail: + rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); error2: + rseq_after_asm_goto(); rseq_bug("expected value comparison failed"); #endif } @@ -1274,16 +1337,21 @@ int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect, , error1, error2 #endif ); + rseq_after_asm_goto(); return 0; abort: + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; cmpfail: + rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); error2: + rseq_after_asm_goto(); rseq_bug("expected value comparison failed"); #endif } diff --git a/tools/testing/selftests/rseq/rseq.h b/tools/testing/selftests/rseq/rseq.h index 17531ccd309065..6bd0ac466b4a3e 100644 --- a/tools/testing/selftests/rseq/rseq.h +++ b/tools/testing/selftests/rseq/rseq.h @@ -17,6 +17,7 @@ #include #include #include "rseq-abi.h" +#include "compiler.h" /* * Empty code injection macros, override when testing. From f5a656b4ab48bb3b4d17dc603d4f3d80dbb61fd5 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Mon, 24 Jan 2022 12:12:52 -0500 Subject: [PATCH 0239/1056] selftests/rseq: x86-64: use %fs segment selector for accessing rseq thread area commit 4e15bb766b6c6e963a4d33629034d0ec3b7637df upstream. Rather than use rseq_get_abi() and pass its result through a register to the inline assembler, directly access the per-thread rseq area through a memory reference combining the %fs segment selector, the constant offset of the field in struct rseq, and the rseq_offset value (in a register). Signed-off-by: Mathieu Desnoyers Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20220124171253.22072-15-mathieu.desnoyers@efficios.com Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/rseq/rseq-x86.h | 58 +++++++++++++------------ 1 file changed, 30 insertions(+), 28 deletions(-) diff --git a/tools/testing/selftests/rseq/rseq-x86.h b/tools/testing/selftests/rseq/rseq-x86.h index 4ab2e74811abb6..29769664edaaaa 100644 --- a/tools/testing/selftests/rseq/rseq-x86.h +++ b/tools/testing/selftests/rseq/rseq-x86.h @@ -28,6 +28,8 @@ #ifdef __x86_64__ +#define RSEQ_ASM_TP_SEGMENT %%fs + #define rseq_smp_mb() \ __asm__ __volatile__ ("lock; addl $0,-128(%%rsp)" ::: "memory", "cc") #define rseq_smp_rmb() rseq_barrier() @@ -123,14 +125,14 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu) RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2]) #endif /* Start rseq by storing table entry pointer into rseq_cs. */ - RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi])) - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f) + RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_CS_OFFSET(%[rseq_offset])) + RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), 4f) RSEQ_INJECT_ASM(3) "cmpq %[v], %[expect]\n\t" "jnz %l[cmpfail]\n\t" RSEQ_INJECT_ASM(4) #ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), %l[error1]) + RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), %l[error1]) "cmpq %[v], %[expect]\n\t" "jnz %l[error2]\n\t" #endif @@ -141,7 +143,7 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu) RSEQ_ASM_DEFINE_ABORT(4, "", abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [rseq_abi] "r" (rseq_get_abi()), + [rseq_offset] "r" ((long)rseq_offset), [v] "m" (*v), [expect] "r" (expect), [newv] "r" (newv) @@ -189,15 +191,15 @@ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot, RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2]) #endif /* Start rseq by storing table entry pointer into rseq_cs. */ - RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi])) - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f) + RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_CS_OFFSET(%[rseq_offset])) + RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), 4f) RSEQ_INJECT_ASM(3) "movq %[v], %%rbx\n\t" "cmpq %%rbx, %[expectnot]\n\t" "je %l[cmpfail]\n\t" RSEQ_INJECT_ASM(4) #ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), %l[error1]) + RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), %l[error1]) "movq %[v], %%rbx\n\t" "cmpq %%rbx, %[expectnot]\n\t" "je %l[error2]\n\t" @@ -212,7 +214,7 @@ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot, RSEQ_ASM_DEFINE_ABORT(4, "", abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [rseq_abi] "r" (rseq_get_abi()), + [rseq_offset] "r" ((long)rseq_offset), /* final store input */ [v] "m" (*v), [expectnot] "r" (expectnot), @@ -255,11 +257,11 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu) RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1]) #endif /* Start rseq by storing table entry pointer into rseq_cs. */ - RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi])) - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f) + RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_CS_OFFSET(%[rseq_offset])) + RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), 4f) RSEQ_INJECT_ASM(3) #ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), %l[error1]) + RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), %l[error1]) #endif /* final store */ "addq %[count], %[v]\n\t" @@ -268,7 +270,7 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu) RSEQ_ASM_DEFINE_ABORT(4, "", abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [rseq_abi] "r" (rseq_get_abi()), + [rseq_offset] "r" ((long)rseq_offset), /* final store input */ [v] "m" (*v), [count] "er" (count) @@ -309,11 +311,11 @@ int rseq_offset_deref_addv(intptr_t *ptr, long off, intptr_t inc, int cpu) RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1]) #endif /* Start rseq by storing table entry pointer into rseq_cs. */ - RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi])) - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f) + RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_CS_OFFSET(%[rseq_offset])) + RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), 4f) RSEQ_INJECT_ASM(3) #ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), %l[error1]) + RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), %l[error1]) #endif /* get p+v */ "movq %[ptr], %%rbx\n\t" @@ -327,7 +329,7 @@ int rseq_offset_deref_addv(intptr_t *ptr, long off, intptr_t inc, int cpu) RSEQ_ASM_DEFINE_ABORT(4, "", abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [rseq_abi] "r" (rseq_get_abi()), + [rseq_offset] "r" ((long)rseq_offset), /* final store input */ [ptr] "m" (*ptr), [off] "er" (off), @@ -364,14 +366,14 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect, RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2]) #endif /* Start rseq by storing table entry pointer into rseq_cs. */ - RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi])) - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f) + RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_CS_OFFSET(%[rseq_offset])) + RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), 4f) RSEQ_INJECT_ASM(3) "cmpq %[v], %[expect]\n\t" "jnz %l[cmpfail]\n\t" RSEQ_INJECT_ASM(4) #ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), %l[error1]) + RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), %l[error1]) "cmpq %[v], %[expect]\n\t" "jnz %l[error2]\n\t" #endif @@ -385,7 +387,7 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect, RSEQ_ASM_DEFINE_ABORT(4, "", abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [rseq_abi] "r" (rseq_get_abi()), + [rseq_offset] "r" ((long)rseq_offset), /* try store input */ [v2] "m" (*v2), [newv2] "r" (newv2), @@ -444,8 +446,8 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect, RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error3]) #endif /* Start rseq by storing table entry pointer into rseq_cs. */ - RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi])) - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f) + RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_CS_OFFSET(%[rseq_offset])) + RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), 4f) RSEQ_INJECT_ASM(3) "cmpq %[v], %[expect]\n\t" "jnz %l[cmpfail]\n\t" @@ -454,7 +456,7 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect, "jnz %l[cmpfail]\n\t" RSEQ_INJECT_ASM(5) #ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), %l[error1]) + RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), %l[error1]) "cmpq %[v], %[expect]\n\t" "jnz %l[error2]\n\t" "cmpq %[v2], %[expect2]\n\t" @@ -467,7 +469,7 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect, RSEQ_ASM_DEFINE_ABORT(4, "", abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [rseq_abi] "r" (rseq_get_abi()), + [rseq_offset] "r" ((long)rseq_offset), /* cmp2 input */ [v2] "m" (*v2), [expect2] "r" (expect2), @@ -524,14 +526,14 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect, "movq %[dst], %[rseq_scratch1]\n\t" "movq %[len], %[rseq_scratch2]\n\t" /* Start rseq by storing table entry pointer into rseq_cs. */ - RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi])) - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f) + RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_CS_OFFSET(%[rseq_offset])) + RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), 4f) RSEQ_INJECT_ASM(3) "cmpq %[v], %[expect]\n\t" "jnz 5f\n\t" RSEQ_INJECT_ASM(4) #ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 6f) + RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), 6f) "cmpq %[v], %[expect]\n\t" "jnz 7f\n\t" #endif @@ -579,7 +581,7 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect, #endif : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [rseq_abi] "r" (rseq_get_abi()), + [rseq_offset] "r" ((long)rseq_offset), /* final store input */ [v] "m" (*v), [expect] "r" (expect), From df2e933a53481558477d2c5dd3fb88a5f2ce25c6 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Mon, 24 Jan 2022 12:12:53 -0500 Subject: [PATCH 0240/1056] selftests/rseq: x86-32: use %gs segment selector for accessing rseq thread area commit 127b6429d235ab7c358223bbfd8a8b8d8cc799b6 upstream. Rather than use rseq_get_abi() and pass its result through a register to the inline assembler, directly access the per-thread rseq area through a memory reference combining the %gs segment selector, the constant offset of the field in struct rseq, and the rseq_offset value (in a register). Signed-off-by: Mathieu Desnoyers Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20220124171253.22072-16-mathieu.desnoyers@efficios.com Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/rseq/rseq-x86.h | 66 +++++++++++++------------ 1 file changed, 34 insertions(+), 32 deletions(-) diff --git a/tools/testing/selftests/rseq/rseq-x86.h b/tools/testing/selftests/rseq/rseq-x86.h index 29769664edaaaa..f704d36643272d 100644 --- a/tools/testing/selftests/rseq/rseq-x86.h +++ b/tools/testing/selftests/rseq/rseq-x86.h @@ -633,6 +633,8 @@ int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect, #elif defined(__i386__) +#define RSEQ_ASM_TP_SEGMENT %%gs + #define rseq_smp_mb() \ __asm__ __volatile__ ("lock; addl $0,-128(%%esp)" ::: "memory", "cc") #define rseq_smp_rmb() \ @@ -732,14 +734,14 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu) RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2]) #endif /* Start rseq by storing table entry pointer into rseq_cs. */ - RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi])) - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f) + RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_CS_OFFSET(%[rseq_offset])) + RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), 4f) RSEQ_INJECT_ASM(3) "cmpl %[v], %[expect]\n\t" "jnz %l[cmpfail]\n\t" RSEQ_INJECT_ASM(4) #ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), %l[error1]) + RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), %l[error1]) "cmpl %[v], %[expect]\n\t" "jnz %l[error2]\n\t" #endif @@ -750,7 +752,7 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu) RSEQ_ASM_DEFINE_ABORT(4, "", abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [rseq_abi] "r" (rseq_get_abi()), + [rseq_offset] "r" (rseq_offset), [v] "m" (*v), [expect] "r" (expect), [newv] "r" (newv) @@ -798,15 +800,15 @@ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot, RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2]) #endif /* Start rseq by storing table entry pointer into rseq_cs. */ - RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi])) - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f) + RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_CS_OFFSET(%[rseq_offset])) + RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), 4f) RSEQ_INJECT_ASM(3) "movl %[v], %%ebx\n\t" "cmpl %%ebx, %[expectnot]\n\t" "je %l[cmpfail]\n\t" RSEQ_INJECT_ASM(4) #ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), %l[error1]) + RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), %l[error1]) "movl %[v], %%ebx\n\t" "cmpl %%ebx, %[expectnot]\n\t" "je %l[error2]\n\t" @@ -821,7 +823,7 @@ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot, RSEQ_ASM_DEFINE_ABORT(4, "", abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [rseq_abi] "r" (rseq_get_abi()), + [rseq_offset] "r" (rseq_offset), /* final store input */ [v] "m" (*v), [expectnot] "r" (expectnot), @@ -864,11 +866,11 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu) RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1]) #endif /* Start rseq by storing table entry pointer into rseq_cs. */ - RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi])) - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f) + RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_CS_OFFSET(%[rseq_offset])) + RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), 4f) RSEQ_INJECT_ASM(3) #ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), %l[error1]) + RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), %l[error1]) #endif /* final store */ "addl %[count], %[v]\n\t" @@ -877,7 +879,7 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu) RSEQ_ASM_DEFINE_ABORT(4, "", abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [rseq_abi] "r" (rseq_get_abi()), + [rseq_offset] "r" (rseq_offset), /* final store input */ [v] "m" (*v), [count] "ir" (count) @@ -916,14 +918,14 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect, RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2]) #endif /* Start rseq by storing table entry pointer into rseq_cs. */ - RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi])) - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f) + RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_CS_OFFSET(%[rseq_offset])) + RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), 4f) RSEQ_INJECT_ASM(3) "cmpl %[v], %[expect]\n\t" "jnz %l[cmpfail]\n\t" RSEQ_INJECT_ASM(4) #ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), %l[error1]) + RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), %l[error1]) "cmpl %[v], %[expect]\n\t" "jnz %l[error2]\n\t" #endif @@ -938,7 +940,7 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect, RSEQ_ASM_DEFINE_ABORT(4, "", abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [rseq_abi] "r" (rseq_get_abi()), + [rseq_offset] "r" (rseq_offset), /* try store input */ [v2] "m" (*v2), [newv2] "m" (newv2), @@ -987,15 +989,15 @@ int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect, RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2]) #endif /* Start rseq by storing table entry pointer into rseq_cs. */ - RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi])) - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f) + RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_CS_OFFSET(%[rseq_offset])) + RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), 4f) RSEQ_INJECT_ASM(3) "movl %[expect], %%eax\n\t" "cmpl %[v], %%eax\n\t" "jnz %l[cmpfail]\n\t" RSEQ_INJECT_ASM(4) #ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), %l[error1]) + RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), %l[error1]) "movl %[expect], %%eax\n\t" "cmpl %[v], %%eax\n\t" "jnz %l[error2]\n\t" @@ -1011,7 +1013,7 @@ int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect, RSEQ_ASM_DEFINE_ABORT(4, "", abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [rseq_abi] "r" (rseq_get_abi()), + [rseq_offset] "r" (rseq_offset), /* try store input */ [v2] "m" (*v2), [newv2] "r" (newv2), @@ -1062,8 +1064,8 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect, RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error3]) #endif /* Start rseq by storing table entry pointer into rseq_cs. */ - RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi])) - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f) + RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_CS_OFFSET(%[rseq_offset])) + RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), 4f) RSEQ_INJECT_ASM(3) "cmpl %[v], %[expect]\n\t" "jnz %l[cmpfail]\n\t" @@ -1072,7 +1074,7 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect, "jnz %l[cmpfail]\n\t" RSEQ_INJECT_ASM(5) #ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), %l[error1]) + RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), %l[error1]) "cmpl %[v], %[expect]\n\t" "jnz %l[error2]\n\t" "cmpl %[expect2], %[v2]\n\t" @@ -1086,7 +1088,7 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect, RSEQ_ASM_DEFINE_ABORT(4, "", abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [rseq_abi] "r" (rseq_get_abi()), + [rseq_offset] "r" (rseq_offset), /* cmp2 input */ [v2] "m" (*v2), [expect2] "r" (expect2), @@ -1144,15 +1146,15 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect, "movl %[dst], %[rseq_scratch1]\n\t" "movl %[len], %[rseq_scratch2]\n\t" /* Start rseq by storing table entry pointer into rseq_cs. */ - RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi])) - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f) + RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_CS_OFFSET(%[rseq_offset])) + RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), 4f) RSEQ_INJECT_ASM(3) "movl %[expect], %%eax\n\t" "cmpl %%eax, %[v]\n\t" "jnz 5f\n\t" RSEQ_INJECT_ASM(4) #ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 6f) + RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), 6f) "movl %[expect], %%eax\n\t" "cmpl %%eax, %[v]\n\t" "jnz 7f\n\t" @@ -1202,7 +1204,7 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect, #endif : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [rseq_abi] "r" (rseq_get_abi()), + [rseq_offset] "r" (rseq_offset), /* final store input */ [v] "m" (*v), [expect] "m" (expect), @@ -1261,15 +1263,15 @@ int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect, "movl %[dst], %[rseq_scratch1]\n\t" "movl %[len], %[rseq_scratch2]\n\t" /* Start rseq by storing table entry pointer into rseq_cs. */ - RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi])) - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f) + RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_CS_OFFSET(%[rseq_offset])) + RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), 4f) RSEQ_INJECT_ASM(3) "movl %[expect], %%eax\n\t" "cmpl %%eax, %[v]\n\t" "jnz 5f\n\t" RSEQ_INJECT_ASM(4) #ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 6f) + RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), 6f) "movl %[expect], %%eax\n\t" "cmpl %%eax, %[v]\n\t" "jnz 7f\n\t" @@ -1320,7 +1322,7 @@ int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect, #endif : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [rseq_abi] "r" (rseq_get_abi()), + [rseq_offset] "r" (rseq_offset), /* final store input */ [v] "m" (*v), [expect] "m" (expect), From 472863c7b5239ac6723e82c11bf89007e0f4d885 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Thu, 3 Feb 2022 10:05:32 -0500 Subject: [PATCH 0241/1056] selftests/rseq: Change type of rseq_offset to ptrdiff_t commit 889c5d60fbcf332c8b6ab7054d45f2768914a375 upstream. Just before the 2.35 release of glibc, the __rseq_offset userspace ABI was changed from int to ptrdiff_t. Adapt to this change in the kernel selftests. Signed-off-by: Mathieu Desnoyers Signed-off-by: Peter Zijlstra (Intel) Link: https://sourceware.org/pipermail/libc-alpha/2022-February/136024.html Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/rseq/rseq-x86.h | 14 +++++++------- tools/testing/selftests/rseq/rseq.c | 5 +++-- tools/testing/selftests/rseq/rseq.h | 3 ++- 3 files changed, 12 insertions(+), 10 deletions(-) diff --git a/tools/testing/selftests/rseq/rseq-x86.h b/tools/testing/selftests/rseq/rseq-x86.h index f704d36643272d..bd01dc41ca1306 100644 --- a/tools/testing/selftests/rseq/rseq-x86.h +++ b/tools/testing/selftests/rseq/rseq-x86.h @@ -143,7 +143,7 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu) RSEQ_ASM_DEFINE_ABORT(4, "", abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [rseq_offset] "r" ((long)rseq_offset), + [rseq_offset] "r" (rseq_offset), [v] "m" (*v), [expect] "r" (expect), [newv] "r" (newv) @@ -214,7 +214,7 @@ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot, RSEQ_ASM_DEFINE_ABORT(4, "", abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [rseq_offset] "r" ((long)rseq_offset), + [rseq_offset] "r" (rseq_offset), /* final store input */ [v] "m" (*v), [expectnot] "r" (expectnot), @@ -270,7 +270,7 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu) RSEQ_ASM_DEFINE_ABORT(4, "", abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [rseq_offset] "r" ((long)rseq_offset), + [rseq_offset] "r" (rseq_offset), /* final store input */ [v] "m" (*v), [count] "er" (count) @@ -329,7 +329,7 @@ int rseq_offset_deref_addv(intptr_t *ptr, long off, intptr_t inc, int cpu) RSEQ_ASM_DEFINE_ABORT(4, "", abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [rseq_offset] "r" ((long)rseq_offset), + [rseq_offset] "r" (rseq_offset), /* final store input */ [ptr] "m" (*ptr), [off] "er" (off), @@ -387,7 +387,7 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect, RSEQ_ASM_DEFINE_ABORT(4, "", abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [rseq_offset] "r" ((long)rseq_offset), + [rseq_offset] "r" (rseq_offset), /* try store input */ [v2] "m" (*v2), [newv2] "r" (newv2), @@ -469,7 +469,7 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect, RSEQ_ASM_DEFINE_ABORT(4, "", abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [rseq_offset] "r" ((long)rseq_offset), + [rseq_offset] "r" (rseq_offset), /* cmp2 input */ [v2] "m" (*v2), [expect2] "r" (expect2), @@ -581,7 +581,7 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect, #endif : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [rseq_offset] "r" ((long)rseq_offset), + [rseq_offset] "r" (rseq_offset), /* final store input */ [v] "m" (*v), [expect] "r" (expect), diff --git a/tools/testing/selftests/rseq/rseq.c b/tools/testing/selftests/rseq/rseq.c index 07ba0d463a9674..986b9458efb264 100644 --- a/tools/testing/selftests/rseq/rseq.c +++ b/tools/testing/selftests/rseq/rseq.c @@ -27,16 +27,17 @@ #include #include #include +#include #include "../kselftest.h" #include "rseq.h" -static const int *libc_rseq_offset_p; +static const ptrdiff_t *libc_rseq_offset_p; static const unsigned int *libc_rseq_size_p; static const unsigned int *libc_rseq_flags_p; /* Offset from the thread pointer to the rseq area. */ -int rseq_offset; +ptrdiff_t rseq_offset; /* Size of the registered rseq area. 0 if the registration was unsuccessful. */ diff --git a/tools/testing/selftests/rseq/rseq.h b/tools/testing/selftests/rseq/rseq.h index 6bd0ac466b4a3e..9d850b290c2e6f 100644 --- a/tools/testing/selftests/rseq/rseq.h +++ b/tools/testing/selftests/rseq/rseq.h @@ -16,6 +16,7 @@ #include #include #include +#include #include "rseq-abi.h" #include "compiler.h" @@ -47,7 +48,7 @@ #include "rseq-thread-pointer.h" /* Offset from the thread pointer to the rseq area. */ -extern int rseq_offset; +extern ptrdiff_t rseq_offset; /* Size of the registered rseq area. 0 if the registration was unsuccessful. */ extern unsigned int rseq_size; From 7ed65a4ad8fa9f40bc3979b32c54243d6a684ec9 Mon Sep 17 00:00:00 2001 From: Roger Pau Monne Date: Wed, 30 Mar 2022 09:03:48 +0200 Subject: [PATCH 0242/1056] xen/blkfront: fix leaking data in shared pages MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 2f446ffe9d737e9a844b97887919c4fda18246e7 upstream. When allocating pages to be used for shared communication with the backend always zero them, this avoids leaking unintended data present on the pages. This is CVE-2022-26365, part of XSA-403. Signed-off-by: Roger Pau Monné Reviewed-by: Jan Beulich Reviewed-by: Juergen Gross Signed-off-by: Juergen Gross Signed-off-by: Greg Kroah-Hartman --- drivers/block/xen-blkfront.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index d7a9bf43fb32ee..317b0b0994051b 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -312,7 +312,7 @@ static int fill_grant_buffer(struct blkfront_ring_info *rinfo, int num) goto out_of_memory; if (info->feature_persistent) { - granted_page = alloc_page(GFP_NOIO); + granted_page = alloc_page(GFP_NOIO | __GFP_ZERO); if (!granted_page) { kfree(gnt_list_entry); goto out_of_memory; @@ -1692,7 +1692,7 @@ static int setup_blkring(struct xenbus_device *dev, for (i = 0; i < info->nr_ring_pages; i++) rinfo->ring_ref[i] = GRANT_INVALID_REF; - sring = alloc_pages_exact(ring_size, GFP_NOIO); + sring = alloc_pages_exact(ring_size, GFP_NOIO | __GFP_ZERO); if (!sring) { xenbus_dev_fatal(dev, -ENOMEM, "allocating shared ring"); return -ENOMEM; @@ -2209,7 +2209,8 @@ static int blkfront_setup_indirect(struct blkfront_ring_info *rinfo) BUG_ON(!list_empty(&rinfo->indirect_pages)); for (i = 0; i < num; i++) { - struct page *indirect_page = alloc_page(GFP_KERNEL); + struct page *indirect_page = alloc_page(GFP_KERNEL | + __GFP_ZERO); if (!indirect_page) goto out_of_memory; list_add(&indirect_page->lru, &rinfo->indirect_pages); From 5dd0993c36832d33820238fc8dc741ba801b7961 Mon Sep 17 00:00:00 2001 From: Roger Pau Monne Date: Wed, 6 Apr 2022 17:38:04 +0200 Subject: [PATCH 0243/1056] xen/netfront: fix leaking data in shared pages MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 307c8de2b02344805ebead3440d8feed28f2f010 upstream. When allocating pages to be used for shared communication with the backend always zero them, this avoids leaking unintended data present on the pages. This is CVE-2022-33740, part of XSA-403. Signed-off-by: Roger Pau Monné Reviewed-by: Jan Beulich Reviewed-by: Juergen Gross Signed-off-by: Juergen Gross Signed-off-by: Greg Kroah-Hartman --- drivers/net/xen-netfront.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index 2492a27467b46c..cae8537115a891 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -273,7 +273,8 @@ static struct sk_buff *xennet_alloc_one_rx_buffer(struct netfront_queue *queue) if (unlikely(!skb)) return NULL; - page = page_pool_dev_alloc_pages(queue->page_pool); + page = page_pool_alloc_pages(queue->page_pool, + GFP_ATOMIC | __GFP_NOWARN | __GFP_ZERO); if (unlikely(!page)) { kfree_skb(skb); return NULL; From ed3cfc690675d852c3416aedb271e0e7d179bf49 Mon Sep 17 00:00:00 2001 From: Roger Pau Monne Date: Thu, 7 Apr 2022 12:20:06 +0200 Subject: [PATCH 0244/1056] xen/netfront: force data bouncing when backend is untrusted MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 4491001c2e0fa69efbb748c96ec96b100a5cdb7e upstream. Bounce all data on the skbs to be transmitted into zeroed pages if the backend is untrusted. This avoids leaking data present in the pages shared with the backend but not part of the skb fragments. This requires introducing a new helper in order to allocate skbs with a size multiple of XEN_PAGE_SIZE so we don't leak contiguous data on the granted pages. Reporting whether the backend is to be trusted can be done using a module parameter, or from the xenstore frontend path as set by the toolstack when adding the device. This is CVE-2022-33741, part of XSA-403. Signed-off-by: Roger Pau Monné Reviewed-by: Juergen Gross Signed-off-by: Juergen Gross Signed-off-by: Greg Kroah-Hartman --- drivers/net/xen-netfront.c | 49 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 47 insertions(+), 2 deletions(-) diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index cae8537115a891..409ee9d97af48b 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -66,6 +66,10 @@ module_param_named(max_queues, xennet_max_queues, uint, 0644); MODULE_PARM_DESC(max_queues, "Maximum number of queues per virtual interface"); +static bool __read_mostly xennet_trusted = true; +module_param_named(trusted, xennet_trusted, bool, 0644); +MODULE_PARM_DESC(trusted, "Is the backend trusted"); + #define XENNET_TIMEOUT (5 * HZ) static const struct ethtool_ops xennet_ethtool_ops; @@ -175,6 +179,9 @@ struct netfront_info { /* Is device behaving sane? */ bool broken; + /* Should skbs be bounced into a zeroed buffer? */ + bool bounce; + atomic_t rx_gso_checksum_fixup; }; @@ -668,6 +675,33 @@ static int xennet_xdp_xmit(struct net_device *dev, int n, return nxmit; } +struct sk_buff *bounce_skb(const struct sk_buff *skb) +{ + unsigned int headerlen = skb_headroom(skb); + /* Align size to allocate full pages and avoid contiguous data leaks */ + unsigned int size = ALIGN(skb_end_offset(skb) + skb->data_len, + XEN_PAGE_SIZE); + struct sk_buff *n = alloc_skb(size, GFP_ATOMIC | __GFP_ZERO); + + if (!n) + return NULL; + + if (!IS_ALIGNED((uintptr_t)n->head, XEN_PAGE_SIZE)) { + WARN_ONCE(1, "misaligned skb allocated\n"); + kfree_skb(n); + return NULL; + } + + /* Set the data pointer */ + skb_reserve(n, headerlen); + /* Set the tail pointer and length */ + skb_put(n, skb->len); + + BUG_ON(skb_copy_bits(skb, -headerlen, n->head, headerlen + skb->len)); + + skb_copy_header(n, skb); + return n; +} #define MAX_XEN_SKB_FRAGS (65536 / XEN_PAGE_SIZE + 1) @@ -721,9 +755,13 @@ static netdev_tx_t xennet_start_xmit(struct sk_buff *skb, struct net_device *dev /* The first req should be at least ETH_HLEN size or the packet will be * dropped by netback. + * + * If the backend is not trusted bounce all data to zeroed pages to + * avoid exposing contiguous data on the granted page not belonging to + * the skb. */ - if (unlikely(PAGE_SIZE - offset < ETH_HLEN)) { - nskb = skb_copy(skb, GFP_ATOMIC); + if (np->bounce || unlikely(PAGE_SIZE - offset < ETH_HLEN)) { + nskb = bounce_skb(skb); if (!nskb) goto drop; dev_consume_skb_any(skb); @@ -2247,6 +2285,10 @@ static int talk_to_netback(struct xenbus_device *dev, info->netdev->irq = 0; + /* Check if backend is trusted. */ + info->bounce = !xennet_trusted || + !xenbus_read_unsigned(dev->nodename, "trusted", 1); + /* Check if backend supports multiple queues */ max_queues = xenbus_read_unsigned(info->xbdev->otherend, "multi-queue-max-queues", 1); @@ -2413,6 +2455,9 @@ static int xennet_connect(struct net_device *dev) return err; if (np->netback_has_xdp_headroom) pr_info("backend supports XDP headroom\n"); + if (np->bounce) + dev_info(&np->xbdev->dev, + "bouncing transmitted data to zeroed pages\n"); /* talk_to_netback() sets the correct number of queues */ num_queues = dev->real_num_tx_queues; From 6d0a9127279a4533815202e30ad1b3a39f560ba3 Mon Sep 17 00:00:00 2001 From: Roger Pau Monne Date: Thu, 7 Apr 2022 13:04:24 +0200 Subject: [PATCH 0245/1056] xen/blkfront: force data bouncing when backend is untrusted MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 2400617da7eebf9167d71a46122828bc479d64c9 upstream. Split the current bounce buffering logic used with persistent grants into it's own option, and allow enabling it independently of persistent grants. This allows to reuse the same code paths to perform the bounce buffering required to avoid leaking contiguous data in shared pages not part of the request fragments. Reporting whether the backend is to be trusted can be done using a module parameter, or from the xenstore frontend path as set by the toolstack when adding the device. This is CVE-2022-33742, part of XSA-403. Signed-off-by: Roger Pau Monné Reviewed-by: Juergen Gross Signed-off-by: Juergen Gross Signed-off-by: Greg Kroah-Hartman --- drivers/block/xen-blkfront.c | 49 +++++++++++++++++++++++++----------- 1 file changed, 34 insertions(+), 15 deletions(-) diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 317b0b0994051b..f6f679702b832b 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -152,6 +152,10 @@ static unsigned int xen_blkif_max_ring_order; module_param_named(max_ring_page_order, xen_blkif_max_ring_order, int, 0444); MODULE_PARM_DESC(max_ring_page_order, "Maximum order of pages to be used for the shared ring"); +static bool __read_mostly xen_blkif_trusted = true; +module_param_named(trusted, xen_blkif_trusted, bool, 0644); +MODULE_PARM_DESC(trusted, "Is the backend trusted"); + #define BLK_RING_SIZE(info) \ __CONST_RING_SIZE(blkif, XEN_PAGE_SIZE * (info)->nr_ring_pages) @@ -209,6 +213,7 @@ struct blkfront_info unsigned int feature_discard:1; unsigned int feature_secdiscard:1; unsigned int feature_persistent:1; + unsigned int bounce:1; unsigned int discard_granularity; unsigned int discard_alignment; /* Number of 4KB segments handled */ @@ -311,7 +316,7 @@ static int fill_grant_buffer(struct blkfront_ring_info *rinfo, int num) if (!gnt_list_entry) goto out_of_memory; - if (info->feature_persistent) { + if (info->bounce) { granted_page = alloc_page(GFP_NOIO | __GFP_ZERO); if (!granted_page) { kfree(gnt_list_entry); @@ -331,7 +336,7 @@ static int fill_grant_buffer(struct blkfront_ring_info *rinfo, int num) list_for_each_entry_safe(gnt_list_entry, n, &rinfo->grants, node) { list_del(&gnt_list_entry->node); - if (info->feature_persistent) + if (info->bounce) __free_page(gnt_list_entry->page); kfree(gnt_list_entry); i--; @@ -377,7 +382,7 @@ static struct grant *get_grant(grant_ref_t *gref_head, /* Assign a gref to this page */ gnt_list_entry->gref = gnttab_claim_grant_reference(gref_head); BUG_ON(gnt_list_entry->gref == -ENOSPC); - if (info->feature_persistent) + if (info->bounce) grant_foreign_access(gnt_list_entry, info); else { /* Grant access to the GFN passed by the caller */ @@ -401,7 +406,7 @@ static struct grant *get_indirect_grant(grant_ref_t *gref_head, /* Assign a gref to this page */ gnt_list_entry->gref = gnttab_claim_grant_reference(gref_head); BUG_ON(gnt_list_entry->gref == -ENOSPC); - if (!info->feature_persistent) { + if (!info->bounce) { struct page *indirect_page; /* Fetch a pre-allocated page to use for indirect grefs */ @@ -703,7 +708,7 @@ static int blkif_queue_rw_req(struct request *req, struct blkfront_ring_info *ri .grant_idx = 0, .segments = NULL, .rinfo = rinfo, - .need_copy = rq_data_dir(req) && info->feature_persistent, + .need_copy = rq_data_dir(req) && info->bounce, }; /* @@ -981,11 +986,12 @@ static void xlvbd_flush(struct blkfront_info *info) { blk_queue_write_cache(info->rq, info->feature_flush ? true : false, info->feature_fua ? true : false); - pr_info("blkfront: %s: %s %s %s %s %s\n", + pr_info("blkfront: %s: %s %s %s %s %s %s %s\n", info->gd->disk_name, flush_info(info), "persistent grants:", info->feature_persistent ? "enabled;" : "disabled;", "indirect descriptors:", - info->max_indirect_segments ? "enabled;" : "disabled;"); + info->max_indirect_segments ? "enabled;" : "disabled;", + "bounce buffer:", info->bounce ? "enabled" : "disabled;"); } static int xen_translate_vdev(int vdevice, int *minor, unsigned int *offset) @@ -1212,7 +1218,7 @@ static void blkif_free_ring(struct blkfront_ring_info *rinfo) if (!list_empty(&rinfo->indirect_pages)) { struct page *indirect_page, *n; - BUG_ON(info->feature_persistent); + BUG_ON(info->bounce); list_for_each_entry_safe(indirect_page, n, &rinfo->indirect_pages, lru) { list_del(&indirect_page->lru); __free_page(indirect_page); @@ -1229,7 +1235,7 @@ static void blkif_free_ring(struct blkfront_ring_info *rinfo) 0, 0UL); rinfo->persistent_gnts_c--; } - if (info->feature_persistent) + if (info->bounce) __free_page(persistent_gnt->page); kfree(persistent_gnt); } @@ -1250,7 +1256,7 @@ static void blkif_free_ring(struct blkfront_ring_info *rinfo) for (j = 0; j < segs; j++) { persistent_gnt = rinfo->shadow[i].grants_used[j]; gnttab_end_foreign_access(persistent_gnt->gref, 0, 0UL); - if (info->feature_persistent) + if (info->bounce) __free_page(persistent_gnt->page); kfree(persistent_gnt); } @@ -1440,7 +1446,7 @@ static int blkif_completion(unsigned long *id, data.s = s; num_sg = s->num_sg; - if (bret->operation == BLKIF_OP_READ && info->feature_persistent) { + if (bret->operation == BLKIF_OP_READ && info->bounce) { for_each_sg(s->sg, sg, num_sg, i) { BUG_ON(sg->offset + sg->length > PAGE_SIZE); @@ -1499,7 +1505,7 @@ static int blkif_completion(unsigned long *id, * Add the used indirect page back to the list of * available pages for indirect grefs. */ - if (!info->feature_persistent) { + if (!info->bounce) { indirect_page = s->indirect_grants[i]->page; list_add(&indirect_page->lru, &rinfo->indirect_pages); } @@ -1790,6 +1796,10 @@ static int talk_to_blkback(struct xenbus_device *dev, if (!info) return -ENODEV; + /* Check if backend is trusted. */ + info->bounce = !xen_blkif_trusted || + !xenbus_read_unsigned(dev->nodename, "trusted", 1); + max_page_order = xenbus_read_unsigned(info->xbdev->otherend, "max-ring-page-order", 0); ring_page_order = min(xen_blkif_max_ring_order, max_page_order); @@ -2199,10 +2209,10 @@ static int blkfront_setup_indirect(struct blkfront_ring_info *rinfo) if (err) goto out_of_memory; - if (!info->feature_persistent && info->max_indirect_segments) { + if (!info->bounce && info->max_indirect_segments) { /* - * We are using indirect descriptors but not persistent - * grants, we need to allocate a set of pages that can be + * We are using indirect descriptors but don't have a bounce + * buffer, we need to allocate a set of pages that can be * used for mapping indirect grefs */ int num = INDIRECT_GREFS(grants) * BLK_RING_SIZE(info); @@ -2303,6 +2313,8 @@ static void blkfront_gather_backend_features(struct blkfront_info *info) info->feature_persistent = !!xenbus_read_unsigned(info->xbdev->otherend, "feature-persistent", 0); + if (info->feature_persistent) + info->bounce = true; indirect_segments = xenbus_read_unsigned(info->xbdev->otherend, "feature-max-indirect-segments", 0); @@ -2566,6 +2578,13 @@ static void blkfront_delay_work(struct work_struct *work) struct blkfront_info *info; bool need_schedule_work = false; + /* + * Note that when using bounce buffers but not persistent grants + * there's no need to run blkfront_delay_work because grants are + * revoked in blkif_completion or else an error is reported and the + * connection is closed. + */ + mutex_lock(&blkfront_mutex); list_for_each_entry(info, &info_list, info_list) { From 1052fc2b7391a43b25168ae69ad658fff5170f04 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Fri, 1 Jul 2022 09:57:19 +0200 Subject: [PATCH 0246/1056] xen-netfront: restore __skb_queue_tail() positioning in xennet_get_responses() commit f63c2c2032c2e3caad9add3b82cc6e91c376fd26 upstream. The commit referenced below moved the invocation past the "next" label, without any explanation. In fact this allows misbehaving backends undue control over the domain the frontend runs in, as earlier detected errors require the skb to not be freed (it may be retained for later processing via xennet_move_rx_slot(), or it may simply be unsafe to have it freed). This is CVE-2022-33743 / XSA-405. Fixes: 6c5aa6fc4def ("xen networking: add basic XDP support for xen-netfront") Signed-off-by: Jan Beulich Reviewed-by: Juergen Gross Signed-off-by: Juergen Gross Signed-off-by: Greg Kroah-Hartman --- drivers/net/xen-netfront.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index 409ee9d97af48b..074dceb1930b35 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -1094,8 +1094,10 @@ static int xennet_get_responses(struct netfront_queue *queue, } } rcu_read_unlock(); -next: + __skb_queue_tail(list, skb); + +next: if (!(rx->flags & XEN_NETRXF_more_data)) break; From 9f83c8f6ab14bbf4311b70bf1b7290d131059101 Mon Sep 17 00:00:00 2001 From: Oleksandr Tyshchenko Date: Fri, 1 Jul 2022 09:57:42 +0200 Subject: [PATCH 0247/1056] xen/arm: Fix race in RB-tree based P2M accounting commit b75cd218274e01d026dc5240e86fdeb44bbed0c8 upstream. During the PV driver life cycle the mappings are added to the RB-tree by set_foreign_p2m_mapping(), which is called from gnttab_map_refs() and are removed by clear_foreign_p2m_mapping() which is called from gnttab_unmap_refs(). As both functions end up calling __set_phys_to_machine_multi() which updates the RB-tree, this function can be called concurrently. There is already a "p2m_lock" to protect against concurrent accesses, but the problem is that the first read of "phys_to_mach.rb_node" in __set_phys_to_machine_multi() is not covered by it, so this might lead to the incorrect mappings update (removing in our case) in RB-tree. In my environment the related issue happens rarely and only when PV net backend is running, the xen_add_phys_to_mach_entry() claims that it cannot add new pfn <-> mfn mapping to the tree since it is already exists which results in a failure when mapping foreign pages. But there might be other bad consequences related to the non-protected root reads such use-after-free, etc. While at it, also fix the similar usage in __pfn_to_mfn(), so initialize "struct rb_node *n" with the "p2m_lock" held in both functions to avoid possible bad consequences. This is CVE-2022-33744 / XSA-406. Signed-off-by: Oleksandr Tyshchenko Reviewed-by: Stefano Stabellini Signed-off-by: Juergen Gross Signed-off-by: Greg Kroah-Hartman --- arch/arm/xen/p2m.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/arm/xen/p2m.c b/arch/arm/xen/p2m.c index 84a1cea1f43b9a..309648c17f4868 100644 --- a/arch/arm/xen/p2m.c +++ b/arch/arm/xen/p2m.c @@ -63,11 +63,12 @@ static int xen_add_phys_to_mach_entry(struct xen_p2m_entry *new) unsigned long __pfn_to_mfn(unsigned long pfn) { - struct rb_node *n = phys_to_mach.rb_node; + struct rb_node *n; struct xen_p2m_entry *entry; unsigned long irqflags; read_lock_irqsave(&p2m_lock, irqflags); + n = phys_to_mach.rb_node; while (n) { entry = rb_entry(n, struct xen_p2m_entry, rbnode_phys); if (entry->pfn <= pfn && @@ -152,10 +153,11 @@ bool __set_phys_to_machine_multi(unsigned long pfn, int rc; unsigned long irqflags; struct xen_p2m_entry *p2m_entry; - struct rb_node *n = phys_to_mach.rb_node; + struct rb_node *n; if (mfn == INVALID_P2M_ENTRY) { write_lock_irqsave(&p2m_lock, irqflags); + n = phys_to_mach.rb_node; while (n) { p2m_entry = rb_entry(n, struct xen_p2m_entry, rbnode_phys); if (p2m_entry->pfn <= pfn && From 74acf9cc87c7e216a1308266253816e8a80d1818 Mon Sep 17 00:00:00 2001 From: Daniele Palmas Date: Fri, 10 Dec 2021 10:57:22 +0100 Subject: [PATCH 0248/1056] net: usb: qmi_wwan: add Telit 0x1070 composition MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 94f2a444f28a649926c410eb9a38afb13a83ebe0 upstream. Add the following Telit FN990 composition: 0x1070: tty, adb, rmnet, tty, tty, tty, tty Signed-off-by: Daniele Palmas Acked-by: Bjørn Mork Link: https://lore.kernel.org/r/20211210095722.22269-1-dnlplm@gmail.com Signed-off-by: Jakub Kicinski Cc: Fabio Porcedda Signed-off-by: Greg Kroah-Hartman --- drivers/net/usb/qmi_wwan.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index 0c7f02ca6822bd..3e1aab1e894e3b 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -1355,6 +1355,7 @@ static const struct usb_device_id products[] = { {QMI_QUIRK_SET_DTR(0x1bc7, 0x1040, 2)}, /* Telit LE922A */ {QMI_QUIRK_SET_DTR(0x1bc7, 0x1050, 2)}, /* Telit FN980 */ {QMI_QUIRK_SET_DTR(0x1bc7, 0x1060, 2)}, /* Telit LN920 */ + {QMI_QUIRK_SET_DTR(0x1bc7, 0x1070, 2)}, /* Telit FN990 */ {QMI_FIXED_INTF(0x1bc7, 0x1100, 3)}, /* Telit ME910 */ {QMI_FIXED_INTF(0x1bc7, 0x1101, 3)}, /* Telit ME910 dual modem */ {QMI_FIXED_INTF(0x1bc7, 0x1200, 5)}, /* Telit LE920 */ From 8169198652b9c02746dda463ba6311509ae27d0b Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 30 Jun 2022 11:55:42 +0200 Subject: [PATCH 0249/1056] clocksource/drivers/ixp4xx: remove EXPORT_SYMBOL_GPL from ixp4xx_timer_setup() ixp4xx_timer_setup is exported, and so can not be an __init function. But it does not need to be exported as it is only called from one in-kernel function, so just remove the EXPORT_SYMBOL_GPL() marking to resolve the build warning. This is fixed "properly" in commit 41929c9f628b ("clocksource/drivers/ixp4xx: Drop boardfile probe path") but that can not be backported to older kernels as the reworking of the IXP4xx codebase is not suitable for stable releases. Cc: Linus Walleij Cc: Daniel Lezcano Signed-off-by: Greg Kroah-Hartman --- drivers/clocksource/timer-ixp4xx.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/clocksource/timer-ixp4xx.c b/drivers/clocksource/timer-ixp4xx.c index cbb184953510b4..b8e92991c47194 100644 --- a/drivers/clocksource/timer-ixp4xx.c +++ b/drivers/clocksource/timer-ixp4xx.c @@ -282,7 +282,6 @@ void __init ixp4xx_timer_setup(resource_size_t timerbase, } ixp4xx_timer_register(base, timer_irq, timer_freq); } -EXPORT_SYMBOL_GPL(ixp4xx_timer_setup); #ifdef CONFIG_OF static __init int ixp4xx_of_timer_init(struct device_node *np) From 25daf14eacd1d6bd24afd879021d2447c5254c4e Mon Sep 17 00:00:00 2001 From: Eddie James Date: Wed, 21 Jul 2021 14:02:29 -0500 Subject: [PATCH 0250/1056] fsi: occ: Force sequence numbering per OCC commit 62f79f3d0eb9f4c224bcc3c7f6fa758515a0a7fa upstream. Set and increment the sequence number during the submit operation. This prevents sequence number conflicts between different users of the interface. A sequence number conflict may result in a user getting an OCC response meant for a different command. Since the sequence number is now modified, the checksum must be calculated and set before submitting the command. Signed-off-by: Eddie James Reviewed-by: Joel Stanley Link: https://lore.kernel.org/r/20210721190231.117185-2-eajames@linux.ibm.com Signed-off-by: Joel Stanley Signed-off-by: Greg Kroah-Hartman --- drivers/fsi/fsi-occ.c | 54 +++++++++++++++++++++++++++++-------------- 1 file changed, 37 insertions(+), 17 deletions(-) diff --git a/drivers/fsi/fsi-occ.c b/drivers/fsi/fsi-occ.c index b223f0ef337b99..ecf738411fe22e 100644 --- a/drivers/fsi/fsi-occ.c +++ b/drivers/fsi/fsi-occ.c @@ -50,6 +50,7 @@ struct occ { struct device *sbefifo; char name[32]; int idx; + u8 sequence_number; enum versions version; struct miscdevice mdev; struct mutex occ_lock; @@ -141,8 +142,7 @@ static ssize_t occ_write(struct file *file, const char __user *buf, { struct occ_client *client = file->private_data; size_t rlen, data_length; - u16 checksum = 0; - ssize_t rc, i; + ssize_t rc; u8 *cmd; if (!client) @@ -156,9 +156,6 @@ static ssize_t occ_write(struct file *file, const char __user *buf, /* Construct the command */ cmd = client->buffer; - /* Sequence number (we could increment and compare with response) */ - cmd[0] = 1; - /* * Copy the user command (assume user data follows the occ command * format) @@ -178,14 +175,7 @@ static ssize_t occ_write(struct file *file, const char __user *buf, goto done; } - /* Calculate checksum */ - for (i = 0; i < data_length + 4; ++i) - checksum += cmd[i]; - - cmd[data_length + 4] = checksum >> 8; - cmd[data_length + 5] = checksum & 0xFF; - - /* Submit command */ + /* Submit command; 4 bytes before the data and 2 bytes after */ rlen = PAGE_SIZE; rc = fsi_occ_submit(client->occ->dev, cmd, data_length + 6, cmd, &rlen); @@ -314,11 +304,13 @@ static int occ_getsram(struct occ *occ, u32 offset, void *data, ssize_t len) return rc; } -static int occ_putsram(struct occ *occ, const void *data, ssize_t len) +static int occ_putsram(struct occ *occ, const void *data, ssize_t len, + u8 seq_no, u16 checksum) { size_t cmd_len, buf_len, resp_len, resp_data_len; u32 data_len = ((len + 7) / 8) * 8; /* must be multiples of 8 B */ __be32 *buf; + u8 *byte_buf; int idx = 0, rc; cmd_len = (occ->version == occ_p10) ? 6 : 5; @@ -358,6 +350,15 @@ static int occ_putsram(struct occ *occ, const void *data, ssize_t len) buf[4 + idx] = cpu_to_be32(data_len); memcpy(&buf[5 + idx], data, len); + byte_buf = (u8 *)&buf[5 + idx]; + /* + * Overwrite the first byte with our sequence number and the last two + * bytes with the checksum. + */ + byte_buf[0] = seq_no; + byte_buf[len - 2] = checksum >> 8; + byte_buf[len - 1] = checksum & 0xff; + rc = sbefifo_submit(occ->sbefifo, buf, cmd_len, buf, &resp_len); if (rc) goto free; @@ -467,9 +468,12 @@ int fsi_occ_submit(struct device *dev, const void *request, size_t req_len, struct occ *occ = dev_get_drvdata(dev); struct occ_response *resp = response; u8 seq_no; + u16 checksum = 0; u16 resp_data_length; + const u8 *byte_request = (const u8 *)request; unsigned long start; int rc; + size_t i; if (!occ) return -ENODEV; @@ -479,11 +483,26 @@ int fsi_occ_submit(struct device *dev, const void *request, size_t req_len, return -EINVAL; } + /* Checksum the request, ignoring first byte (sequence number). */ + for (i = 1; i < req_len - 2; ++i) + checksum += byte_request[i]; + mutex_lock(&occ->occ_lock); - /* Extract the seq_no from the command (first byte) */ - seq_no = *(const u8 *)request; - rc = occ_putsram(occ, request, req_len); + /* + * Get a sequence number and update the counter. Avoid a sequence + * number of 0 which would pass the response check below even if the + * OCC response is uninitialized. Any sequence number the user is + * trying to send is overwritten since this function is the only common + * interface to the OCC and therefore the only place we can guarantee + * unique sequence numbers. + */ + seq_no = occ->sequence_number++; + if (!occ->sequence_number) + occ->sequence_number = 1; + checksum += seq_no; + + rc = occ_putsram(occ, request, req_len, seq_no, checksum); if (rc) goto done; @@ -574,6 +593,7 @@ static int occ_probe(struct platform_device *pdev) occ->version = (uintptr_t)of_device_get_match_data(dev); occ->dev = dev; occ->sbefifo = dev->parent; + occ->sequence_number = 1; mutex_init(&occ->occ_lock); if (dev->of_node) { From 4dc036ddf4bf224088036b856d6ff555fdca180e Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 23 Jun 2022 16:32:32 +0300 Subject: [PATCH 0251/1056] net: fix IFF_TX_SKB_NO_LINEAR definition [ Upstream commit 3b89b511ea0c705cc418440e2abf9d692a556d84 ] The "1<<31" shift has a sign extension bug so IFF_TX_SKB_NO_LINEAR is 0xffffffff80000000 instead of 0x0000000080000000. Fixes: c2ff53d8049f ("net: Add priv_flags for allow tx skb without linear") Signed-off-by: Dan Carpenter Reviewed-by: Xuan Zhuo Link: https://lore.kernel.org/r/YrRrcGttfEVnf85Q@kili Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- include/linux/netdevice.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 39f1893ecac034..f8d46dc62d6582 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1645,7 +1645,7 @@ enum netdev_priv_flags { IFF_FAILOVER_SLAVE = 1<<28, IFF_L3MDEV_RX_HANDLER = 1<<29, IFF_LIVE_RENAME_OK = 1<<30, - IFF_TX_SKB_NO_LINEAR = 1<<31, + IFF_TX_SKB_NO_LINEAR = BIT_ULL(31), }; #define IFF_802_1Q_VLAN IFF_802_1Q_VLAN From a13ea254268c5538f4c2f1a1a344ad07cd7b87d3 Mon Sep 17 00:00:00 2001 From: katrinzhou Date: Tue, 21 Jun 2022 13:49:26 +0100 Subject: [PATCH 0252/1056] drm/i915/gem: add missing else [ Upstream commit 9efdd519d001ee3e761f6ff80d5eb123387421c1 ] Add missing else in set_proto_ctx_param() to fix coverity issue. Addresses-Coverity: ("Unused value") Fixes: d4433c7600f7 ("drm/i915/gem: Use the proto-context to handle create parameters (v5)") Suggested-by: Tvrtko Ursulin Signed-off-by: katrinzhou [tursulin: fixup alignment] Signed-off-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20220621124926.615884-1-tvrtko.ursulin@linux.intel.com (cherry picked from commit 7482a65664c16cc88eb84d2b545a1fed887378a1) Signed-off-by: Jani Nikula Signed-off-by: Sasha Levin --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index 166bb46408a9bd..ee0c0b712522ff 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -720,8 +720,9 @@ static int set_proto_ctx_param(struct drm_i915_file_private *fpriv, case I915_CONTEXT_PARAM_PERSISTENCE: if (args->size) ret = -EINVAL; - ret = proto_context_set_persistence(fpriv->dev_priv, pc, - args->value); + else + ret = proto_context_set_persistence(fpriv->dev_priv, pc, + args->value); break; case I915_CONTEXT_PARAM_NO_ZEROMAP: From 68aa6f13dc43fc6e5572e5bdaac0eb24475d15aa Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Fri, 24 Jun 2022 11:45:28 -0700 Subject: [PATCH 0253/1056] drm/msm/gem: Fix error return on fence id alloc fail [ Upstream commit 08de214138cdea438a0dfcb10d355a6650c6017c ] This was a typo, we didn't actually want to return zero. Fixes: a61acbbe9cf8 ("drm/msm: Track "seqno" fences by idr") Signed-off-by: Rob Clark Reviewed-by: Dmitry Baryshkov Patchwork: https://patchwork.freedesktop.org/patch/491145/ Link: https://lore.kernel.org/r/20220624184528.4036837-1-robdclark@gmail.com Signed-off-by: Sasha Levin --- drivers/gpu/drm/msm/msm_gem_submit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c index 7fb7ff043bcd79..1f74bab9e231aa 100644 --- a/drivers/gpu/drm/msm/msm_gem_submit.c +++ b/drivers/gpu/drm/msm/msm_gem_submit.c @@ -889,7 +889,7 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data, submit->fence_id = idr_alloc_cyclic(&queue->fence_idr, submit->user_fence, 1, INT_MAX, GFP_KERNEL); if (submit->fence_id < 0) { - ret = submit->fence_id = 0; + ret = submit->fence_id; submit->fence_id = 0; goto out; } From 8547315c1377fe66937dd5eb926c051ead00a73f Mon Sep 17 00:00:00 2001 From: Liang He Date: Wed, 15 Jun 2022 17:48:07 +0800 Subject: [PATCH 0254/1056] drivers: cpufreq: Add missing of_node_put() in qoriq-cpufreq.c [ Upstream commit 4ff5a9b6d95f3524bf6d27147df497eb21968300 ] In qoriq_cpufreq_probe(), of_find_matching_node() will return a node pointer with refcount incremented. We should use of_node_put() when it is not used anymore. Fixes: 157f527639da ("cpufreq: qoriq: convert to a platform driver") [ Viresh: Fixed Author's name in commit log ] Signed-off-by: Liang He Signed-off-by: Viresh Kumar Signed-off-by: Sasha Levin --- drivers/cpufreq/qoriq-cpufreq.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/cpufreq/qoriq-cpufreq.c b/drivers/cpufreq/qoriq-cpufreq.c index 6b6b20da2bcfc8..573b417e148330 100644 --- a/drivers/cpufreq/qoriq-cpufreq.c +++ b/drivers/cpufreq/qoriq-cpufreq.c @@ -275,6 +275,7 @@ static int qoriq_cpufreq_probe(struct platform_device *pdev) np = of_find_matching_node(NULL, qoriq_cpufreq_blacklist); if (np) { + of_node_put(np); dev_info(&pdev->dev, "Disabling due to erratum A-008083"); return -ENODEV; } From b619348d9d698b688088868ff125ee3937b0a3c6 Mon Sep 17 00:00:00 2001 From: Stefan Seyfried Date: Fri, 24 Jun 2022 13:23:35 +0200 Subject: [PATCH 0255/1056] platform/x86: panasonic-laptop: de-obfuscate button codes [ Upstream commit 65a3e6c8d3f7c346813a05f3d76fc46b640d76d6 ] In the definition of panasonic_keymap[] the key codes are given in decimal, later checks are done with hexadecimal values, which does not help in understanding the code. Additionally use two helper variables to shorten the code and make the logic more obvious. Fixes: ed83c9171829 ("platform/x86: panasonic-laptop: Resolve hotkey double trigger bug") Signed-off-by: Stefan Seyfried Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20220624112340.10130-3-hdegoede@redhat.com Signed-off-by: Sasha Levin --- drivers/platform/x86/panasonic-laptop.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/drivers/platform/x86/panasonic-laptop.c b/drivers/platform/x86/panasonic-laptop.c index d4f444401496e8..84c16d9d9f8efc 100644 --- a/drivers/platform/x86/panasonic-laptop.c +++ b/drivers/platform/x86/panasonic-laptop.c @@ -762,6 +762,8 @@ static void acpi_pcc_generate_keyinput(struct pcc_acpi *pcc) struct input_dev *hotk_input_dev = pcc->input_dev; int rc; unsigned long long result; + unsigned int key; + unsigned int updown; rc = acpi_evaluate_integer(pcc->handle, METHOD_HKEY_QUERY, NULL, &result); @@ -770,18 +772,22 @@ static void acpi_pcc_generate_keyinput(struct pcc_acpi *pcc) return; } + key = result & 0xf; + updown = result & 0x80; /* 0x80 == key down; 0x00 = key up */ + /* hack: some firmware sends no key down for sleep / hibernate */ - if ((result & 0xf) == 0x7 || (result & 0xf) == 0xa) { - if (result & 0x80) + if (key == 7 || key == 10) { + if (updown) sleep_keydown_seen = 1; if (!sleep_keydown_seen) sparse_keymap_report_event(hotk_input_dev, - result & 0xf, 0x80, false); + key, 0x80, false); } - if ((result & 0xf) == 0x7 || (result & 0xf) == 0x9 || (result & 0xf) == 0xa) { + /* for the magic values, see panasonic_keymap[] above */ + if (key == 7 || key == 9 || key == 10) { if (!sparse_keymap_report_event(hotk_input_dev, - result & 0xf, result & 0x80, false)) + key, updown, false)) pr_err("Unknown hotkey event: 0x%04llx\n", result); } } From 484e10843a748e8cbb10531785859d48eaac9d7b Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Fri, 24 Jun 2022 13:23:36 +0200 Subject: [PATCH 0256/1056] platform/x86: panasonic-laptop: sort includes alphabetically [ Upstream commit fe4326c8d18dc8a54affdc9ab269ad92dafef659 ] Sort includes alphabetically, small cleanup patch in preparation of further changes. Fixes: ed83c9171829 ("platform/x86: panasonic-laptop: Resolve hotkey double trigger bug") Signed-off-by: Hans de Goede Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20220624112340.10130-4-hdegoede@redhat.com Signed-off-by: Sasha Levin --- drivers/platform/x86/panasonic-laptop.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/drivers/platform/x86/panasonic-laptop.c b/drivers/platform/x86/panasonic-laptop.c index 84c16d9d9f8efc..b89fbbc2fd081f 100644 --- a/drivers/platform/x86/panasonic-laptop.c +++ b/drivers/platform/x86/panasonic-laptop.c @@ -119,20 +119,19 @@ * - v0.1 start from toshiba_acpi driver written by John Belmonte */ -#include -#include -#include -#include +#include #include #include -#include -#include -#include -#include +#include #include #include +#include +#include #include - +#include +#include +#include +#include MODULE_AUTHOR("Hiroshi Miura "); MODULE_AUTHOR("David Bronaugh "); From 6201123ca5bc9481b84f8db2c65a0bbcad32ba14 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Fri, 24 Jun 2022 13:23:37 +0200 Subject: [PATCH 0257/1056] platform/x86: panasonic-laptop: revert "Resolve hotkey double trigger bug" [ Upstream commit 83a5ddc3dc561c40d948b85553514aaba99123d8 ] In hindsight blindly throwing away most of the key-press events is not a good idea. So revert commit ed83c9171829 ("platform/x86: panasonic-laptop: Resolve hotkey double trigger bug"). Fixes: ed83c9171829 ("platform/x86: panasonic-laptop: Resolve hotkey double trigger bug") Reported-and-tested-by: Stefan Seyfried Reported-and-tested-by: Kenneth Chan Signed-off-by: Hans de Goede Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20220624112340.10130-5-hdegoede@redhat.com Signed-off-by: Sasha Levin --- drivers/platform/x86/panasonic-laptop.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/platform/x86/panasonic-laptop.c b/drivers/platform/x86/panasonic-laptop.c index b89fbbc2fd081f..cd3c23593eee94 100644 --- a/drivers/platform/x86/panasonic-laptop.c +++ b/drivers/platform/x86/panasonic-laptop.c @@ -783,12 +783,8 @@ static void acpi_pcc_generate_keyinput(struct pcc_acpi *pcc) key, 0x80, false); } - /* for the magic values, see panasonic_keymap[] above */ - if (key == 7 || key == 9 || key == 10) { - if (!sparse_keymap_report_event(hotk_input_dev, - key, updown, false)) - pr_err("Unknown hotkey event: 0x%04llx\n", result); - } + if (!sparse_keymap_report_event(hotk_input_dev, key, updown, false)) + pr_err("Unknown hotkey event: 0x%04llx\n", result); } static void acpi_pcc_hotkey_notify(struct acpi_device *device, u32 event) From b9b7a115dfd16f22f46f14a2b419333d62229d9c Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Fri, 24 Jun 2022 13:23:38 +0200 Subject: [PATCH 0258/1056] platform/x86: panasonic-laptop: don't report duplicate brightness key-presses [ Upstream commit 1f2c9de83a50447a2d7166f6273ab0c0e97cd68e ] The brightness key-presses might also get reported by the ACPI video bus, check for this and in this case don't report the presses to avoid reporting 2 presses for a single key-press. Fixes: ed83c9171829 ("platform/x86: panasonic-laptop: Resolve hotkey double trigger bug") Reported-and-tested-by: Stefan Seyfried Reported-and-tested-by: Kenneth Chan Signed-off-by: Hans de Goede Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20220624112340.10130-6-hdegoede@redhat.com Signed-off-by: Sasha Levin --- drivers/platform/x86/Kconfig | 1 + drivers/platform/x86/panasonic-laptop.c | 8 ++++++++ 2 files changed, 9 insertions(+) diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig index e21ea3d23e6f22..50a5c4f3cefdee 100644 --- a/drivers/platform/x86/Kconfig +++ b/drivers/platform/x86/Kconfig @@ -871,6 +871,7 @@ config PANASONIC_LAPTOP tristate "Panasonic Laptop Extras" depends on INPUT && ACPI depends on BACKLIGHT_CLASS_DEVICE + depends on ACPI_VIDEO=n || ACPI_VIDEO select INPUT_SPARSEKMAP help This driver adds support for access to backlight control and hotkeys diff --git a/drivers/platform/x86/panasonic-laptop.c b/drivers/platform/x86/panasonic-laptop.c index cd3c23593eee94..65ca863ffb9f7b 100644 --- a/drivers/platform/x86/panasonic-laptop.c +++ b/drivers/platform/x86/panasonic-laptop.c @@ -132,6 +132,7 @@ #include #include #include +#include MODULE_AUTHOR("Hiroshi Miura "); MODULE_AUTHOR("David Bronaugh "); @@ -783,6 +784,13 @@ static void acpi_pcc_generate_keyinput(struct pcc_acpi *pcc) key, 0x80, false); } + /* + * Don't report brightness key-presses if they are also reported + * by the ACPI video bus. + */ + if ((key == 1 || key == 2) && acpi_video_handles_brightness_key_presses()) + return; + if (!sparse_keymap_report_event(hotk_input_dev, key, updown, false)) pr_err("Unknown hotkey event: 0x%04llx\n", result); } From f2def264335018e76c97f7a97a80ecc4b6abb9e6 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Fri, 24 Jun 2022 13:23:39 +0200 Subject: [PATCH 0259/1056] platform/x86: panasonic-laptop: filter out duplicate volume up/down/mute keypresses [ Upstream commit aacb455dfe01b7a24a792a2fbe7a04112ce8321d ] On some Panasonic models the volume up/down/mute keypresses get reported both through the Panasonic ACPI HKEY interface as well as through the atkbd device. Filter out the atkbd scan-codes for these to avoid reporting presses twice. Note normally we would leave the filtering of these to userspace by mapping the scan-codes to KEY_UNKNOWN through /lib/udev/hwdb.d/60-keyboard.hwdb. However in this case that would cause regressions since we were filtering the Panasonic ACPI HKEY events before, so filter these in the kernel. Fixes: ed83c9171829 ("platform/x86: panasonic-laptop: Resolve hotkey double trigger bug") Reported-and-tested-by: Stefan Seyfried Reported-and-tested-by: Kenneth Chan Signed-off-by: Hans de Goede Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20220624112340.10130-7-hdegoede@redhat.com Signed-off-by: Sasha Levin --- drivers/platform/x86/Kconfig | 1 + drivers/platform/x86/panasonic-laptop.c | 41 +++++++++++++++++++++++++ 2 files changed, 42 insertions(+) diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig index 50a5c4f3cefdee..f1ff003bb14bc8 100644 --- a/drivers/platform/x86/Kconfig +++ b/drivers/platform/x86/Kconfig @@ -872,6 +872,7 @@ config PANASONIC_LAPTOP depends on INPUT && ACPI depends on BACKLIGHT_CLASS_DEVICE depends on ACPI_VIDEO=n || ACPI_VIDEO + depends on SERIO_I8042 || SERIO_I8042 = n select INPUT_SPARSEKMAP help This driver adds support for access to backlight control and hotkeys diff --git a/drivers/platform/x86/panasonic-laptop.c b/drivers/platform/x86/panasonic-laptop.c index 65ca863ffb9f7b..7ca49b3fc6c287 100644 --- a/drivers/platform/x86/panasonic-laptop.c +++ b/drivers/platform/x86/panasonic-laptop.c @@ -122,6 +122,7 @@ #include #include #include +#include #include #include #include @@ -129,6 +130,7 @@ #include #include #include +#include #include #include #include @@ -241,6 +243,42 @@ struct pcc_acpi { struct platform_device *platform; }; +/* + * On some Panasonic models the volume up / down / mute keys send duplicate + * keypress events over the PS/2 kbd interface, filter these out. + */ +static bool panasonic_i8042_filter(unsigned char data, unsigned char str, + struct serio *port) +{ + static bool extended; + + if (str & I8042_STR_AUXDATA) + return false; + + if (data == 0xe0) { + extended = true; + return true; + } else if (extended) { + extended = false; + + switch (data & 0x7f) { + case 0x20: /* e0 20 / e0 a0, Volume Mute press / release */ + case 0x2e: /* e0 2e / e0 ae, Volume Down press / release */ + case 0x30: /* e0 30 / e0 b0, Volume Up press / release */ + return true; + default: + /* + * Report the previously filtered e0 before continuing + * with the next non-filtered byte. + */ + serio_interrupt(port, 0xe0, 0); + return false; + } + } + + return false; +} + /* method access functions */ static int acpi_pcc_write_sset(struct pcc_acpi *pcc, int func, int val) { @@ -1006,6 +1044,7 @@ static int acpi_pcc_hotkey_add(struct acpi_device *device) pcc->platform = NULL; } + i8042_install_filter(panasonic_i8042_filter); return 0; out_platform: @@ -1029,6 +1068,8 @@ static int acpi_pcc_hotkey_remove(struct acpi_device *device) if (!device || !pcc) return -EINVAL; + i8042_remove_filter(panasonic_i8042_filter); + if (pcc->platform) { device_remove_file(&pcc->platform->dev, &dev_attr_cdpower); platform_device_unregister(pcc->platform); From 5b458d3de9cfac4a21b704c90c8c7eff244c8b13 Mon Sep 17 00:00:00 2001 From: Carlos Llamas Date: Tue, 21 Jun 2022 20:39:21 +0000 Subject: [PATCH 0260/1056] drm/fourcc: fix integer type usage in uapi header MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 20b8264394b33adb1640a485a62a84bc1388b6a3 ] Kernel uapi headers are supposed to use __[us]{8,16,32,64} types defined by as opposed to 'uint32_t' and similar. See [1] for the relevant discussion about this topic. In this particular case, the usage of 'uint64_t' escaped headers_check as these macros are not being called here. However, the following program triggers a compilation error: #include int main() { unsigned long x = AMD_FMT_MOD_CLEAR(RB); return 0; } gcc error: drm.c:5:27: error: ‘uint64_t’ undeclared (first use in this function) 5 | unsigned long x = AMD_FMT_MOD_CLEAR(RB); | ^~~~~~~~~~~~~~~~~ This patch changes AMD_FMT_MOD_{SET,CLEAR} macros to use the correct integer types, which fixes the above issue. [1] https://lkml.org/lkml/2019/6/5/18 Fixes: 8ba16d599374 ("drm/fourcc: Add AMD DRM modifiers.") Signed-off-by: Carlos Llamas Reviewed-by: Simon Ser Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- include/uapi/drm/drm_fourcc.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/uapi/drm/drm_fourcc.h b/include/uapi/drm/drm_fourcc.h index 9f4bb4a6f358c6..808c73c52820fb 100644 --- a/include/uapi/drm/drm_fourcc.h +++ b/include/uapi/drm/drm_fourcc.h @@ -1352,11 +1352,11 @@ drm_fourcc_canonicalize_nvidia_format_mod(__u64 modifier) #define AMD_FMT_MOD_PIPE_MASK 0x7 #define AMD_FMT_MOD_SET(field, value) \ - ((uint64_t)(value) << AMD_FMT_MOD_##field##_SHIFT) + ((__u64)(value) << AMD_FMT_MOD_##field##_SHIFT) #define AMD_FMT_MOD_GET(field, value) \ (((value) >> AMD_FMT_MOD_##field##_SHIFT) & AMD_FMT_MOD_##field##_MASK) #define AMD_FMT_MOD_CLEAR(field) \ - (~((uint64_t)AMD_FMT_MOD_##field##_MASK << AMD_FMT_MOD_##field##_SHIFT)) + (~((__u64)AMD_FMT_MOD_##field##_MASK << AMD_FMT_MOD_##field##_SHIFT)) #if defined(__cplusplus) } From 38920480329f99070f8cf2baacf6380409b9b810 Mon Sep 17 00:00:00 2001 From: Eddie James Date: Wed, 21 Jul 2021 14:02:30 -0500 Subject: [PATCH 0261/1056] hwmon: (occ) Remove sequence numbering and checksum calculation [ Upstream commit 908dbf0242e21dd95c69a1b0935814cd1abfc134 ] Checksumming of the request and sequence numbering is now done in the OCC interface driver in order to keep unique sequence numbers. So remove those in the hwmon driver. Also, add the command length to the send_cmd function pointer, since the checksum must be placed in the last two bytes of the command. The submit interface must receive the exact size of the command - previously it could be rounded to the nearest 8 bytes with no consequence. Signed-off-by: Eddie James Acked-by: Guenter Roeck Link: https://lore.kernel.org/r/20210721190231.117185-3-eajames@linux.ibm.com Signed-off-by: Joel Stanley Signed-off-by: Sasha Levin --- drivers/hwmon/occ/common.c | 30 ++++++++++++------------------ drivers/hwmon/occ/common.h | 3 +-- drivers/hwmon/occ/p8_i2c.c | 15 +++++++++------ drivers/hwmon/occ/p9_sbe.c | 4 ++-- 4 files changed, 24 insertions(+), 28 deletions(-) diff --git a/drivers/hwmon/occ/common.c b/drivers/hwmon/occ/common.c index ae664613289c45..0cb4a0a6cbc10d 100644 --- a/drivers/hwmon/occ/common.c +++ b/drivers/hwmon/occ/common.c @@ -132,22 +132,20 @@ struct extended_sensor { static int occ_poll(struct occ *occ) { int rc; - u16 checksum = occ->poll_cmd_data + occ->seq_no + 1; - u8 cmd[8]; + u8 cmd[7]; struct occ_poll_response_header *header; /* big endian */ - cmd[0] = occ->seq_no++; /* sequence number */ + cmd[0] = 0; /* sequence number */ cmd[1] = 0; /* cmd type */ cmd[2] = 0; /* data length msb */ cmd[3] = 1; /* data length lsb */ cmd[4] = occ->poll_cmd_data; /* data */ - cmd[5] = checksum >> 8; /* checksum msb */ - cmd[6] = checksum & 0xFF; /* checksum lsb */ - cmd[7] = 0; + cmd[5] = 0; /* checksum msb */ + cmd[6] = 0; /* checksum lsb */ /* mutex should already be locked if necessary */ - rc = occ->send_cmd(occ, cmd); + rc = occ->send_cmd(occ, cmd, sizeof(cmd)); if (rc) { occ->last_error = rc; if (occ->error_count++ > OCC_ERROR_COUNT_THRESHOLD) @@ -184,25 +182,23 @@ static int occ_set_user_power_cap(struct occ *occ, u16 user_power_cap) { int rc; u8 cmd[8]; - u16 checksum = 0x24; __be16 user_power_cap_be = cpu_to_be16(user_power_cap); - cmd[0] = 0; - cmd[1] = 0x22; - cmd[2] = 0; - cmd[3] = 2; + cmd[0] = 0; /* sequence number */ + cmd[1] = 0x22; /* cmd type */ + cmd[2] = 0; /* data length msb */ + cmd[3] = 2; /* data length lsb */ memcpy(&cmd[4], &user_power_cap_be, 2); - checksum += cmd[4] + cmd[5]; - cmd[6] = checksum >> 8; - cmd[7] = checksum & 0xFF; + cmd[6] = 0; /* checksum msb */ + cmd[7] = 0; /* checksum lsb */ rc = mutex_lock_interruptible(&occ->lock); if (rc) return rc; - rc = occ->send_cmd(occ, cmd); + rc = occ->send_cmd(occ, cmd, sizeof(cmd)); mutex_unlock(&occ->lock); @@ -1144,8 +1140,6 @@ int occ_setup(struct occ *occ, const char *name) { int rc; - /* start with 1 to avoid false match with zero-initialized SRAM buffer */ - occ->seq_no = 1; mutex_init(&occ->lock); occ->groups[0] = &occ->group; diff --git a/drivers/hwmon/occ/common.h b/drivers/hwmon/occ/common.h index e6df719770e812..5020117be740f3 100644 --- a/drivers/hwmon/occ/common.h +++ b/drivers/hwmon/occ/common.h @@ -95,9 +95,8 @@ struct occ { struct occ_sensors sensors; int powr_sample_time_us; /* average power sample time */ - u8 seq_no; u8 poll_cmd_data; /* to perform OCC poll command */ - int (*send_cmd)(struct occ *occ, u8 *cmd); + int (*send_cmd)(struct occ *occ, u8 *cmd, size_t len); unsigned long next_update; struct mutex lock; /* lock OCC access */ diff --git a/drivers/hwmon/occ/p8_i2c.c b/drivers/hwmon/occ/p8_i2c.c index 0cf8588be35acf..9e61e1fb5142cf 100644 --- a/drivers/hwmon/occ/p8_i2c.c +++ b/drivers/hwmon/occ/p8_i2c.c @@ -97,18 +97,21 @@ static int p8_i2c_occ_putscom_u32(struct i2c_client *client, u32 address, } static int p8_i2c_occ_putscom_be(struct i2c_client *client, u32 address, - u8 *data) + u8 *data, size_t len) { - __be32 data0, data1; + __be32 data0 = 0, data1 = 0; - memcpy(&data0, data, 4); - memcpy(&data1, data + 4, 4); + memcpy(&data0, data, min_t(size_t, len, 4)); + if (len > 4) { + len -= 4; + memcpy(&data1, data + 4, min_t(size_t, len, 4)); + } return p8_i2c_occ_putscom_u32(client, address, be32_to_cpu(data0), be32_to_cpu(data1)); } -static int p8_i2c_occ_send_cmd(struct occ *occ, u8 *cmd) +static int p8_i2c_occ_send_cmd(struct occ *occ, u8 *cmd, size_t len) { int i, rc; unsigned long start; @@ -127,7 +130,7 @@ static int p8_i2c_occ_send_cmd(struct occ *occ, u8 *cmd) return rc; /* write command (expected to already be BE), we need bus-endian... */ - rc = p8_i2c_occ_putscom_be(client, OCB_DATA3, cmd); + rc = p8_i2c_occ_putscom_be(client, OCB_DATA3, cmd, len); if (rc) return rc; diff --git a/drivers/hwmon/occ/p9_sbe.c b/drivers/hwmon/occ/p9_sbe.c index f6387cc0b75407..9709f2b9c052a3 100644 --- a/drivers/hwmon/occ/p9_sbe.c +++ b/drivers/hwmon/occ/p9_sbe.c @@ -16,14 +16,14 @@ struct p9_sbe_occ { #define to_p9_sbe_occ(x) container_of((x), struct p9_sbe_occ, occ) -static int p9_sbe_occ_send_cmd(struct occ *occ, u8 *cmd) +static int p9_sbe_occ_send_cmd(struct occ *occ, u8 *cmd, size_t len) { struct occ_response *resp = &occ->resp; struct p9_sbe_occ *ctx = to_p9_sbe_occ(occ); size_t resp_len = sizeof(*resp); int rc; - rc = fsi_occ_submit(ctx->sbe, cmd, 8, resp, &resp_len); + rc = fsi_occ_submit(ctx->sbe, cmd, len, resp, &resp_len); if (rc < 0) return rc; From 8848842f0a9be483af03ff9fda0eca27bcdd3a5c Mon Sep 17 00:00:00 2001 From: Eddie James Date: Tue, 28 Jun 2022 15:30:29 -0500 Subject: [PATCH 0262/1056] hwmon: (occ) Prevent power cap command overwriting poll response [ Upstream commit 1bbb2809040a1f9c7c53c9f06c21aa83275ed27b ] Currently, the response to the power cap command overwrites the first eight bytes of the poll response, since the commands use the same buffer. This means that user's get the wrong data between the time of sending the power cap and the next poll response update. Fix this by specifying a different buffer for the power cap command response. Fixes: 5b5513b88002 ("hwmon: Add On-Chip Controller (OCC) hwmon driver") Signed-off-by: Eddie James Link: https://lore.kernel.org/r/20220628203029.51747-1-eajames@linux.ibm.com Signed-off-by: Guenter Roeck Signed-off-by: Sasha Levin --- drivers/hwmon/occ/common.c | 5 +++-- drivers/hwmon/occ/common.h | 3 ++- drivers/hwmon/occ/p8_i2c.c | 13 +++++++------ drivers/hwmon/occ/p9_sbe.c | 7 +++---- 4 files changed, 15 insertions(+), 13 deletions(-) diff --git a/drivers/hwmon/occ/common.c b/drivers/hwmon/occ/common.c index 0cb4a0a6cbc10d..bbe5e4ef4113cb 100644 --- a/drivers/hwmon/occ/common.c +++ b/drivers/hwmon/occ/common.c @@ -145,7 +145,7 @@ static int occ_poll(struct occ *occ) cmd[6] = 0; /* checksum lsb */ /* mutex should already be locked if necessary */ - rc = occ->send_cmd(occ, cmd, sizeof(cmd)); + rc = occ->send_cmd(occ, cmd, sizeof(cmd), &occ->resp, sizeof(occ->resp)); if (rc) { occ->last_error = rc; if (occ->error_count++ > OCC_ERROR_COUNT_THRESHOLD) @@ -182,6 +182,7 @@ static int occ_set_user_power_cap(struct occ *occ, u16 user_power_cap) { int rc; u8 cmd[8]; + u8 resp[8]; __be16 user_power_cap_be = cpu_to_be16(user_power_cap); cmd[0] = 0; /* sequence number */ @@ -198,7 +199,7 @@ static int occ_set_user_power_cap(struct occ *occ, u16 user_power_cap) if (rc) return rc; - rc = occ->send_cmd(occ, cmd, sizeof(cmd)); + rc = occ->send_cmd(occ, cmd, sizeof(cmd), resp, sizeof(resp)); mutex_unlock(&occ->lock); diff --git a/drivers/hwmon/occ/common.h b/drivers/hwmon/occ/common.h index 5020117be740f3..7abf191020628e 100644 --- a/drivers/hwmon/occ/common.h +++ b/drivers/hwmon/occ/common.h @@ -96,7 +96,8 @@ struct occ { int powr_sample_time_us; /* average power sample time */ u8 poll_cmd_data; /* to perform OCC poll command */ - int (*send_cmd)(struct occ *occ, u8 *cmd, size_t len); + int (*send_cmd)(struct occ *occ, u8 *cmd, size_t len, void *resp, + size_t resp_len); unsigned long next_update; struct mutex lock; /* lock OCC access */ diff --git a/drivers/hwmon/occ/p8_i2c.c b/drivers/hwmon/occ/p8_i2c.c index 9e61e1fb5142cf..c35c07964d856c 100644 --- a/drivers/hwmon/occ/p8_i2c.c +++ b/drivers/hwmon/occ/p8_i2c.c @@ -111,7 +111,8 @@ static int p8_i2c_occ_putscom_be(struct i2c_client *client, u32 address, be32_to_cpu(data1)); } -static int p8_i2c_occ_send_cmd(struct occ *occ, u8 *cmd, size_t len) +static int p8_i2c_occ_send_cmd(struct occ *occ, u8 *cmd, size_t len, + void *resp, size_t resp_len) { int i, rc; unsigned long start; @@ -120,7 +121,7 @@ static int p8_i2c_occ_send_cmd(struct occ *occ, u8 *cmd, size_t len) const long wait_time = msecs_to_jiffies(OCC_CMD_IN_PRG_WAIT_MS); struct p8_i2c_occ *ctx = to_p8_i2c_occ(occ); struct i2c_client *client = ctx->client; - struct occ_response *resp = &occ->resp; + struct occ_response *or = (struct occ_response *)resp; start = jiffies; @@ -151,7 +152,7 @@ static int p8_i2c_occ_send_cmd(struct occ *occ, u8 *cmd, size_t len) return rc; /* wait for OCC */ - if (resp->return_status == OCC_RESP_CMD_IN_PRG) { + if (or->return_status == OCC_RESP_CMD_IN_PRG) { rc = -EALREADY; if (time_after(jiffies, start + timeout)) @@ -163,7 +164,7 @@ static int p8_i2c_occ_send_cmd(struct occ *occ, u8 *cmd, size_t len) } while (rc); /* check the OCC response */ - switch (resp->return_status) { + switch (or->return_status) { case OCC_RESP_CMD_IN_PRG: rc = -ETIMEDOUT; break; @@ -192,8 +193,8 @@ static int p8_i2c_occ_send_cmd(struct occ *occ, u8 *cmd, size_t len) if (rc < 0) return rc; - data_length = get_unaligned_be16(&resp->data_length); - if (data_length > OCC_RESP_DATA_BYTES) + data_length = get_unaligned_be16(&or->data_length); + if ((data_length + 7) > resp_len) return -EMSGSIZE; /* fetch the rest of the response data */ diff --git a/drivers/hwmon/occ/p9_sbe.c b/drivers/hwmon/occ/p9_sbe.c index 9709f2b9c052a3..14923e78e1f324 100644 --- a/drivers/hwmon/occ/p9_sbe.c +++ b/drivers/hwmon/occ/p9_sbe.c @@ -16,18 +16,17 @@ struct p9_sbe_occ { #define to_p9_sbe_occ(x) container_of((x), struct p9_sbe_occ, occ) -static int p9_sbe_occ_send_cmd(struct occ *occ, u8 *cmd, size_t len) +static int p9_sbe_occ_send_cmd(struct occ *occ, u8 *cmd, size_t len, + void *resp, size_t resp_len) { - struct occ_response *resp = &occ->resp; struct p9_sbe_occ *ctx = to_p9_sbe_occ(occ); - size_t resp_len = sizeof(*resp); int rc; rc = fsi_occ_submit(ctx->sbe, cmd, len, resp, &resp_len); if (rc < 0) return rc; - switch (resp->return_status) { + switch (((struct occ_response *)resp)->return_status) { case OCC_RESP_CMD_IN_PRG: rc = -ETIMEDOUT; break; From 6b316eedff44f73f55299e0a812a0e021d5e95eb Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Fri, 1 Jul 2022 15:41:53 +0800 Subject: [PATCH 0263/1056] hwmon: (ibmaem) don't call platform_device_del() if platform_device_add() fails [ Upstream commit d0e51022a025ca5350fafb8e413a6fe5d4baf833 ] If platform_device_add() fails, it no need to call platform_device_del(), split platform_device_unregister() into platform_device_del/put(), so platform_device_put() can be called separately. Fixes: 8808a793f052 ("ibmaem: new driver for power/energy/temp meters in IBM System X hardware") Reported-by: Hulk Robot Signed-off-by: Yang Yingliang Link: https://lore.kernel.org/r/20220701074153.4021556-1-yangyingliang@huawei.com Signed-off-by: Guenter Roeck Signed-off-by: Sasha Levin --- drivers/hwmon/ibmaem.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/hwmon/ibmaem.c b/drivers/hwmon/ibmaem.c index a4ec85207782d8..2e6d6a5cffa164 100644 --- a/drivers/hwmon/ibmaem.c +++ b/drivers/hwmon/ibmaem.c @@ -550,7 +550,7 @@ static int aem_init_aem1_inst(struct aem_ipmi_data *probe, u8 module_handle) res = platform_device_add(data->pdev); if (res) - goto ipmi_err; + goto dev_add_err; platform_set_drvdata(data->pdev, data); @@ -598,7 +598,9 @@ static int aem_init_aem1_inst(struct aem_ipmi_data *probe, u8 module_handle) ipmi_destroy_user(data->ipmi.user); ipmi_err: platform_set_drvdata(data->pdev, NULL); - platform_device_unregister(data->pdev); + platform_device_del(data->pdev); +dev_add_err: + platform_device_put(data->pdev); dev_err: ida_simple_remove(&aem_ida, data->id); id_err: @@ -690,7 +692,7 @@ static int aem_init_aem2_inst(struct aem_ipmi_data *probe, res = platform_device_add(data->pdev); if (res) - goto ipmi_err; + goto dev_add_err; platform_set_drvdata(data->pdev, data); @@ -738,7 +740,9 @@ static int aem_init_aem2_inst(struct aem_ipmi_data *probe, ipmi_destroy_user(data->ipmi.user); ipmi_err: platform_set_drvdata(data->pdev, NULL); - platform_device_unregister(data->pdev); + platform_device_del(data->pdev); +dev_add_err: + platform_device_put(data->pdev); dev_err: ida_simple_remove(&aem_ida, data->id); id_err: From eb18ccd146339b58faa47c1837f1b61ba05a1acf Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 7 Jul 2022 17:53:35 +0200 Subject: [PATCH 0264/1056] Linux 5.15.53 Link: https://lore.kernel.org/r/20220705115617.568350164@linuxfoundation.org Tested-by: Jon Hunter Tested-by: Florian Fainelli Tested-by: Linux Kernel Functional Testing Tested-by: Bagas Sanjaya Tested-by: Ron Economos Tested-by: Sudip Mukherjee Tested-by: Guenter Roeck Tested-by: Shuah Khan Signed-off-by: Greg Kroah-Hartman --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 777e0a0eeccd11..c7750d260a551a 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 5 PATCHLEVEL = 15 -SUBLEVEL = 52 +SUBLEVEL = 53 EXTRAVERSION = NAME = Trick or Treat From 0515cc9b6b24877f59b222ade704bfaa42caa2a6 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Wed, 8 Jun 2022 20:22:05 +0200 Subject: [PATCH 0265/1056] mm/slub: add missing TID updates on slab deactivation commit eeaa345e128515135ccb864c04482180c08e3259 upstream. The fastpath in slab_alloc_node() assumes that c->slab is stable as long as the TID stays the same. However, two places in __slab_alloc() currently don't update the TID when deactivating the CPU slab. If multiple operations race the right way, this could lead to an object getting lost; or, in an even more unlikely situation, it could even lead to an object being freed onto the wrong slab's freelist, messing up the `inuse` counter and eventually causing a page to be freed to the page allocator while it still contains slab objects. (I haven't actually tested these cases though, this is just based on looking at the code. Writing testcases for this stuff seems like it'd be a pain...) The race leading to state inconsistency is (all operations on the same CPU and kmem_cache): - task A: begin do_slab_free(): - read TID - read pcpu freelist (==NULL) - check `slab == c->slab` (true) - [PREEMPT A->B] - task B: begin slab_alloc_node(): - fastpath fails (`c->freelist` is NULL) - enter __slab_alloc() - slub_get_cpu_ptr() (disables preemption) - enter ___slab_alloc() - take local_lock_irqsave() - read c->freelist as NULL - get_freelist() returns NULL - write `c->slab = NULL` - drop local_unlock_irqrestore() - goto new_slab - slub_percpu_partial() is NULL - get_partial() returns NULL - slub_put_cpu_ptr() (enables preemption) - [PREEMPT B->A] - task A: finish do_slab_free(): - this_cpu_cmpxchg_double() succeeds() - [CORRUPT STATE: c->slab==NULL, c->freelist!=NULL] From there, the object on c->freelist will get lost if task B is allowed to continue from here: It will proceed to the retry_load_slab label, set c->slab, then jump to load_freelist, which clobbers c->freelist. But if we instead continue as follows, we get worse corruption: - task A: run __slab_free() on object from other struct slab: - CPU_PARTIAL_FREE case (slab was on no list, is now on pcpu partial) - task A: run slab_alloc_node() with NUMA node constraint: - fastpath fails (c->slab is NULL) - call __slab_alloc() - slub_get_cpu_ptr() (disables preemption) - enter ___slab_alloc() - c->slab is NULL: goto new_slab - slub_percpu_partial() is non-NULL - set c->slab to slub_percpu_partial(c) - [CORRUPT STATE: c->slab points to slab-1, c->freelist has objects from slab-2] - goto redo - node_match() fails - goto deactivate_slab - existing c->freelist is passed into deactivate_slab() - inuse count of slab-1 is decremented to account for object from slab-2 At this point, the inuse count of slab-1 is 1 lower than it should be. This means that if we free all allocated objects in slab-1 except for one, SLUB will think that slab-1 is completely unused, and may free its page, leading to use-after-free. Fixes: c17dda40a6a4e ("slub: Separate out kmem_cache_cpu processing from deactivate_slab") Fixes: 03e404af26dc2 ("slub: fast release on full slab") Cc: stable@vger.kernel.org Signed-off-by: Jann Horn Acked-by: Christoph Lameter Acked-by: David Rientjes Reviewed-by: Muchun Song Tested-by: Hyeonggon Yoo <42.hyeyoo@gmail.com> Signed-off-by: Vlastimil Babka Link: https://lore.kernel.org/r/20220608182205.2945720-1-jannh@google.com Signed-off-by: Greg Kroah-Hartman --- mm/slub.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mm/slub.c b/mm/slub.c index b75eebc0350e75..519bbbad7b2f6f 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -2935,6 +2935,7 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, if (!freelist) { c->page = NULL; + c->tid = next_tid(c->tid); local_unlock_irqrestore(&s->cpu_slab->lock, flags); stat(s, DEACTIVATE_BYPASS); goto new_slab; @@ -2967,6 +2968,7 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, freelist = c->freelist; c->page = NULL; c->freelist = NULL; + c->tid = next_tid(c->tid); local_unlock_irqrestore(&s->cpu_slab->lock, flags); deactivate_slab(s, page, freelist); From 066a5b6784727217e3a6453d5dbf88aaf6d69c66 Mon Sep 17 00:00:00 2001 From: Liu Shixin Date: Thu, 7 Jul 2022 10:09:38 +0800 Subject: [PATCH 0266/1056] mm/filemap: fix UAF in find_lock_entries Release refcount after xas_set to fix UAF which may cause panic like this: page:ffffea000491fa40 refcount:1 mapcount:0 mapping:0000000000000000 index:0x1 pfn:0x1247e9 head:ffffea000491fa00 order:3 compound_mapcount:0 compound_pincount:0 memcg:ffff888104f91091 flags: 0x2fffff80010200(slab|head|node=0|zone=2|lastcpupid=0x1fffff) ... page dumped because: VM_BUG_ON_PAGE(PageTail(page)) ------------[ cut here ]------------ kernel BUG at include/linux/page-flags.h:632! invalid opcode: 0000 [#1] SMP DEBUG_PAGEALLOC KASAN CPU: 1 PID: 7642 Comm: sh Not tainted 5.15.51-dirty #26 ... Call Trace: __invalidate_mapping_pages+0xe7/0x540 drop_pagecache_sb+0x159/0x320 iterate_supers+0x120/0x240 drop_caches_sysctl_handler+0xaa/0xe0 proc_sys_call_handler+0x2b4/0x480 new_sync_write+0x3d6/0x5c0 vfs_write+0x446/0x7a0 ksys_write+0x105/0x210 do_syscall_64+0x35/0x80 entry_SYSCALL_64_after_hwframe+0x44/0xae RIP: 0033:0x7f52b5733130 ... This problem has been fixed on mainline by patch 6b24ca4a1a8d ("mm: Use multi-index entries in the page cache") since it deletes the related code. Fixes: 5c211ba29deb ("mm: add and use find_lock_entries") Signed-off-by: Liu Shixin Acked-by: Matthew Wilcox (Oracle) Signed-off-by: Greg Kroah-Hartman --- mm/filemap.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/mm/filemap.c b/mm/filemap.c index 00e391e7588017..dbc461703ff454 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -2090,7 +2090,11 @@ unsigned find_lock_entries(struct address_space *mapping, pgoff_t start, rcu_read_lock(); while ((page = find_get_entry(&xas, end, XA_PRESENT))) { + unsigned long next_idx = xas.xa_index + 1; + if (!xa_is_value(page)) { + if (PageTransHuge(page)) + next_idx = page->index + thp_nr_pages(page); if (page->index < start) goto put; if (page->index + thp_nr_pages(page) - 1 > end) @@ -2111,13 +2115,11 @@ unsigned find_lock_entries(struct address_space *mapping, pgoff_t start, put: put_page(page); next: - if (!xa_is_value(page) && PageTransHuge(page)) { - unsigned int nr_pages = thp_nr_pages(page); - + if (next_idx != xas.xa_index + 1) { /* Final THP may cross MAX_LFS_FILESIZE on 32-bit */ - xas_set(&xas, page->index + nr_pages); - if (xas.xa_index < nr_pages) + if (next_idx < xas.xa_index) break; + xas_set(&xas, next_idx); } } rcu_read_unlock(); From e63b94b8dd5ffeb9f8af615e9a6f52be44b9d237 Mon Sep 17 00:00:00 2001 From: Po-Hsu Lin Date: Thu, 7 Jul 2022 17:42:07 +0800 Subject: [PATCH 0267/1056] Revert "selftests/bpf: Add test for bpf_timer overwriting crash" This reverts commit b0028e1cc1faf2e5d88ad4065590aca90d650182 which is commit a7e75016a0753c24d6c995bc02501ae35368e333 upstream. It will break the bpf self-tests build with: progs/timer_crash.c:8:19: error: field has incomplete type 'struct bpf_timer' struct bpf_timer timer; ^ /home/ubuntu/linux/tools/testing/selftests/bpf/tools/include/bpf/bpf_helper_defs.h:39:8: note: forward declaration of 'struct bpf_timer' struct bpf_timer; ^ 1 error generated. This test can only be built with 5.17 and newer kernels. Signed-off-by: Po-Hsu Lin Signed-off-by: Greg Kroah-Hartman --- .../selftests/bpf/prog_tests/timer_crash.c | 32 ----------- .../testing/selftests/bpf/progs/timer_crash.c | 54 ------------------- 2 files changed, 86 deletions(-) delete mode 100644 tools/testing/selftests/bpf/prog_tests/timer_crash.c delete mode 100644 tools/testing/selftests/bpf/progs/timer_crash.c diff --git a/tools/testing/selftests/bpf/prog_tests/timer_crash.c b/tools/testing/selftests/bpf/prog_tests/timer_crash.c deleted file mode 100644 index f74b82305da8c8..00000000000000 --- a/tools/testing/selftests/bpf/prog_tests/timer_crash.c +++ /dev/null @@ -1,32 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include -#include "timer_crash.skel.h" - -enum { - MODE_ARRAY, - MODE_HASH, -}; - -static void test_timer_crash_mode(int mode) -{ - struct timer_crash *skel; - - skel = timer_crash__open_and_load(); - if (!ASSERT_OK_PTR(skel, "timer_crash__open_and_load")) - return; - skel->bss->pid = getpid(); - skel->bss->crash_map = mode; - if (!ASSERT_OK(timer_crash__attach(skel), "timer_crash__attach")) - goto end; - usleep(1); -end: - timer_crash__destroy(skel); -} - -void test_timer_crash(void) -{ - if (test__start_subtest("array")) - test_timer_crash_mode(MODE_ARRAY); - if (test__start_subtest("hash")) - test_timer_crash_mode(MODE_HASH); -} diff --git a/tools/testing/selftests/bpf/progs/timer_crash.c b/tools/testing/selftests/bpf/progs/timer_crash.c deleted file mode 100644 index f8f7944e70dae4..00000000000000 --- a/tools/testing/selftests/bpf/progs/timer_crash.c +++ /dev/null @@ -1,54 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 - -#include -#include -#include - -struct map_elem { - struct bpf_timer timer; - struct bpf_spin_lock lock; -}; - -struct { - __uint(type, BPF_MAP_TYPE_ARRAY); - __uint(max_entries, 1); - __type(key, int); - __type(value, struct map_elem); -} amap SEC(".maps"); - -struct { - __uint(type, BPF_MAP_TYPE_HASH); - __uint(max_entries, 1); - __type(key, int); - __type(value, struct map_elem); -} hmap SEC(".maps"); - -int pid = 0; -int crash_map = 0; /* 0 for amap, 1 for hmap */ - -SEC("fentry/do_nanosleep") -int sys_enter(void *ctx) -{ - struct map_elem *e, value = {}; - void *map = crash_map ? (void *)&hmap : (void *)&amap; - - if (bpf_get_current_task_btf()->tgid != pid) - return 0; - - *(void **)&value = (void *)0xdeadcaf3; - - bpf_map_update_elem(map, &(int){0}, &value, 0); - /* For array map, doing bpf_map_update_elem will do a - * check_and_free_timer_in_array, which will trigger the crash if timer - * pointer was overwritten, for hmap we need to use bpf_timer_cancel. - */ - if (crash_map == 1) { - e = bpf_map_lookup_elem(map, &(int){0}); - if (!e) - return 0; - bpf_timer_cancel(&e->timer); - } - return 0; -} - -char _license[] SEC("license") = "GPL"; From f62c53c6e70d42aada4e1dd04f506821d51beee0 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 24 Jun 2022 12:11:32 +0200 Subject: [PATCH 0268/1056] ALSA: usb-audio: Workarounds for Behringer UMC 204/404 HD commit ae8b1631561a3634cc09d0c62bbdd938eade05ec upstream. Both Behringer UMC 202 HD and 404 HD need explicit quirks to enable the implicit feedback mode and start the playback stream primarily. The former seems fixing the stuttering and the latter is required for a playback-only case. Note that the "clock source 41 is not valid" error message still appears even after this fix, but it should be only once at probe. The reason of the error is still unknown, but this seems to be mostly harmless as it's a one-off error and the driver retires the clock setup and it succeeds afterwards. BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=215934 Cc: Link: https://lore.kernel.org/r/20220624101132.14528-1-tiwai@suse.de Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/quirks.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index e8468f9b007d1f..12ce69b04f634c 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -1842,6 +1842,10 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = { QUIRK_FLAG_SHARE_MEDIA_DEVICE | QUIRK_FLAG_ALIGN_TRANSFER), DEVICE_FLG(0x1395, 0x740a, /* Sennheiser DECT */ QUIRK_FLAG_GET_SAMPLE_RATE), + DEVICE_FLG(0x1397, 0x0508, /* Behringer UMC204HD */ + QUIRK_FLAG_PLAYBACK_FIRST | QUIRK_FLAG_GENERIC_IMPLICIT_FB), + DEVICE_FLG(0x1397, 0x0509, /* Behringer UMC404HD */ + QUIRK_FLAG_PLAYBACK_FIRST | QUIRK_FLAG_GENERIC_IMPLICIT_FB), DEVICE_FLG(0x13e5, 0x0001, /* Serato Phono */ QUIRK_FLAG_IGNORE_CTL_ERROR), DEVICE_FLG(0x154e, 0x1002, /* Denon DCD-1500RE */ From f768f3ca5f386bbb1bdd8e3b323bd9047af4e839 Mon Sep 17 00:00:00 2001 From: Tim Crawford Date: Fri, 24 Jun 2022 08:41:09 -0600 Subject: [PATCH 0269/1056] ALSA: hda/realtek: Add quirk for Clevo L140PU commit 11bea26929a1a3a9dd1a287b60c2f471701bf706 upstream. Fixes headset detection on Clevo L140PU. Signed-off-by: Tim Crawford Cc: Link: https://lore.kernel.org/r/20220624144109.3957-1-tcrawford@system76.com Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 70664c9832d6fa..3c3eac2399da7d 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -9001,6 +9001,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1558, 0x70f4, "Clevo NH77EPY", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0x70f6, "Clevo NH77DPQ-Y", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0x7716, "Clevo NS50PU", ALC256_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0x7718, "Clevo L140PU", ALC256_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0x8228, "Clevo NR40BU", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0x8520, "Clevo NH50D[CD]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0x8521, "Clevo NH77D[CD]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), From 51aab37a66a203ce14630f8dd191713a27a6f457 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 5 Jul 2022 17:23:36 +0200 Subject: [PATCH 0270/1056] ALSA: cs46xx: Fix missing snd_card_free() call at probe error commit c5e58c4545a69677d078b4c813b5d10d3481be9c upstream. The previous cleanup with devres may lead to the incorrect release orders at the probe error handling due to the devres's nature. Until we register the card, snd_card_free() has to be called at first for releasing the stuff properly when the driver tries to manage and release the stuff via card->private_free(). This patch fixes it by calling snd_card_free() manually on the error from the probe callback. Fixes: 5bff69b3645d ("ALSA: cs46xx: Allocate resources with device-managed APIs") Cc: Reported-and-tested-by: Jan Engelhardt Link: https://lore.kernel.org/r/p2p1s96o-746-74p4-s95-61qo1p7782pn@vanv.qr Link: https://lore.kernel.org/r/20220705152336.350-1-tiwai@suse.de Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/cs46xx/cs46xx.c | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/sound/pci/cs46xx/cs46xx.c b/sound/pci/cs46xx/cs46xx.c index bd60308769ff7e..8634004a606b62 100644 --- a/sound/pci/cs46xx/cs46xx.c +++ b/sound/pci/cs46xx/cs46xx.c @@ -74,36 +74,36 @@ static int snd_card_cs46xx_probe(struct pci_dev *pci, err = snd_cs46xx_create(card, pci, external_amp[dev], thinkpad[dev]); if (err < 0) - return err; + goto error; card->private_data = chip; chip->accept_valid = mmap_valid[dev]; err = snd_cs46xx_pcm(chip, 0); if (err < 0) - return err; + goto error; #ifdef CONFIG_SND_CS46XX_NEW_DSP err = snd_cs46xx_pcm_rear(chip, 1); if (err < 0) - return err; + goto error; err = snd_cs46xx_pcm_iec958(chip, 2); if (err < 0) - return err; + goto error; #endif err = snd_cs46xx_mixer(chip, 2); if (err < 0) - return err; + goto error; #ifdef CONFIG_SND_CS46XX_NEW_DSP if (chip->nr_ac97_codecs ==2) { err = snd_cs46xx_pcm_center_lfe(chip, 3); if (err < 0) - return err; + goto error; } #endif err = snd_cs46xx_midi(chip, 0); if (err < 0) - return err; + goto error; err = snd_cs46xx_start_dsp(chip); if (err < 0) - return err; + goto error; snd_cs46xx_gameport(chip); @@ -117,11 +117,15 @@ static int snd_card_cs46xx_probe(struct pci_dev *pci, err = snd_card_register(card); if (err < 0) - return err; + goto error; pci_set_drvdata(pci, card); dev++; return 0; + + error: + snd_card_free(card); + return err; } static struct pci_driver cs46xx_driver = { From f34f2a18e47b73e48f90a757e1f4aaa8c7d665a1 Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Fri, 20 May 2022 20:32:39 +0200 Subject: [PATCH 0271/1056] can: bcm: use call_rcu() instead of costly synchronize_rcu() commit f1b4e32aca0811aa011c76e5d6cf2fa19224b386 upstream. In commit d5f9023fa61e ("can: bcm: delay release of struct bcm_op after synchronize_rcu()") Thadeu Lima de Souza Cascardo introduced two synchronize_rcu() calls in bcm_release() (only once at socket close) and in bcm_delete_rx_op() (called on removal of each single bcm_op). Unfortunately this slow removal of the bcm_op's affects user space applications like cansniffer where the modification of a filter removes 2048 bcm_op's which blocks the cansniffer application for 40(!) seconds. In commit 181d4447905d ("can: gw: use call_rcu() instead of costly synchronize_rcu()") Eric Dumazet replaced the synchronize_rcu() calls with several call_rcu()'s to safely remove the data structures after the removal of CAN ID subscriptions with can_rx_unregister() calls. This patch adopts Erics approach for the can-bcm which should be applicable since the removal of tasklet_kill() in bcm_remove_op() and the introduction of the HRTIMER_MODE_SOFT timer handling in Linux 5.4. Fixes: d5f9023fa61e ("can: bcm: delay release of struct bcm_op after synchronize_rcu()") # >= 5.4 Link: https://lore.kernel.org/all/20220520183239.19111-1-socketcan@hartkopp.net Cc: stable@vger.kernel.org Cc: Eric Dumazet Cc: Norbert Slusarek Cc: Thadeu Lima de Souza Cascardo Signed-off-by: Oliver Hartkopp Signed-off-by: Marc Kleine-Budde Signed-off-by: Greg Kroah-Hartman --- net/can/bcm.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/net/can/bcm.c b/net/can/bcm.c index 508f67de0b8013..e5ffd2bd62ab81 100644 --- a/net/can/bcm.c +++ b/net/can/bcm.c @@ -100,6 +100,7 @@ static inline u64 get_u64(const struct canfd_frame *cp, int offset) struct bcm_op { struct list_head list; + struct rcu_head rcu; int ifindex; canid_t can_id; u32 flags; @@ -718,10 +719,9 @@ static struct bcm_op *bcm_find_op(struct list_head *ops, return NULL; } -static void bcm_remove_op(struct bcm_op *op) +static void bcm_free_op_rcu(struct rcu_head *rcu_head) { - hrtimer_cancel(&op->timer); - hrtimer_cancel(&op->thrtimer); + struct bcm_op *op = container_of(rcu_head, struct bcm_op, rcu); if ((op->frames) && (op->frames != &op->sframe)) kfree(op->frames); @@ -732,6 +732,14 @@ static void bcm_remove_op(struct bcm_op *op) kfree(op); } +static void bcm_remove_op(struct bcm_op *op) +{ + hrtimer_cancel(&op->timer); + hrtimer_cancel(&op->thrtimer); + + call_rcu(&op->rcu, bcm_free_op_rcu); +} + static void bcm_rx_unreg(struct net_device *dev, struct bcm_op *op) { if (op->rx_reg_dev == dev) { @@ -757,6 +765,9 @@ static int bcm_delete_rx_op(struct list_head *ops, struct bcm_msg_head *mh, if ((op->can_id == mh->can_id) && (op->ifindex == ifindex) && (op->flags & CAN_FD_FRAME) == (mh->flags & CAN_FD_FRAME)) { + /* disable automatic timer on frame reception */ + op->flags |= RX_NO_AUTOTIMER; + /* * Don't care if we're bound or not (due to netdev * problems) can_rx_unregister() is always a save @@ -785,7 +796,6 @@ static int bcm_delete_rx_op(struct list_head *ops, struct bcm_msg_head *mh, bcm_rx_handler, op); list_del(&op->list); - synchronize_rcu(); bcm_remove_op(op); return 1; /* done */ } From 8cfa1a33b0fba49515cf1db6b039e0cf3cbce638 Mon Sep 17 00:00:00 2001 From: Liang He Date: Sun, 19 Jun 2022 15:02:57 +0800 Subject: [PATCH 0272/1056] can: grcan: grcan_probe(): remove extra of_node_get() commit 562fed945ea482833667f85496eeda766d511386 upstream. In grcan_probe(), of_find_node_by_path() has already increased the refcount. There is no need to call of_node_get() again, so remove it. Link: https://lore.kernel.org/all/20220619070257.4067022-1-windhl@126.com Fixes: 1e93ed26acf0 ("can: grcan: grcan_probe(): fix broken system id check for errata workaround needs") Cc: stable@vger.kernel.org # v5.18 Cc: Andreas Larsson Signed-off-by: Liang He Signed-off-by: Marc Kleine-Budde Signed-off-by: Greg Kroah-Hartman --- drivers/net/can/grcan.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/can/grcan.c b/drivers/net/can/grcan.c index daee3652ac8b33..e098f594a7492d 100644 --- a/drivers/net/can/grcan.c +++ b/drivers/net/can/grcan.c @@ -1659,7 +1659,6 @@ static int grcan_probe(struct platform_device *ofdev) */ sysid_parent = of_find_node_by_path("/ambapp0"); if (sysid_parent) { - of_node_get(sysid_parent); err = of_property_read_u32(sysid_parent, "systemid", &sysid); if (!err && ((sysid & GRLIB_VERSION_MASK) >= GRCAN_TXBUG_SAFE_GRLIB_VERSION)) From 0e60230bc64355c80abe993d1719fdb318094e20 Mon Sep 17 00:00:00 2001 From: Rhett Aultman Date: Sun, 3 Jul 2022 19:33:06 +0200 Subject: [PATCH 0273/1056] can: gs_usb: gs_usb_open/close(): fix memory leak commit 2bda24ef95c0311ab93bda00db40486acf30bd0a upstream. The gs_usb driver appears to suffer from a malady common to many USB CAN adapter drivers in that it performs usb_alloc_coherent() to allocate a number of USB request blocks (URBs) for RX, and then later relies on usb_kill_anchored_urbs() to free them, but this doesn't actually free them. As a result, this may be leaking DMA memory that's been used by the driver. This commit is an adaptation of the techniques found in the esd_usb2 driver where a similar design pattern led to a memory leak. It explicitly frees the RX URBs and their DMA memory via a call to usb_free_coherent(). Since the RX URBs were allocated in the gs_can_open(), we remove them in gs_can_close() rather than in the disconnect function as was done in esd_usb2. For more information, see the 928150fad41b ("can: esd_usb2: fix memory leak"). Link: https://lore.kernel.org/all/alpine.DEB.2.22.394.2206031547001.1630869@thelappy Fixes: d08e973a77d1 ("can: gs_usb: Added support for the GS_USB CAN devices") Cc: stable@vger.kernel.org Signed-off-by: Rhett Aultman Signed-off-by: Marc Kleine-Budde Signed-off-by: Greg Kroah-Hartman --- drivers/net/can/usb/gs_usb.c | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/drivers/net/can/usb/gs_usb.c b/drivers/net/can/usb/gs_usb.c index d68d698f2606b6..e26b3d6f5b482c 100644 --- a/drivers/net/can/usb/gs_usb.c +++ b/drivers/net/can/usb/gs_usb.c @@ -185,6 +185,8 @@ struct gs_can { struct usb_anchor tx_submitted; atomic_t active_tx_urbs; + void *rxbuf[GS_MAX_RX_URBS]; + dma_addr_t rxbuf_dma[GS_MAX_RX_URBS]; }; /* usb interface struct */ @@ -594,6 +596,7 @@ static int gs_can_open(struct net_device *netdev) for (i = 0; i < GS_MAX_RX_URBS; i++) { struct urb *urb; u8 *buf; + dma_addr_t buf_dma; /* alloc rx urb */ urb = usb_alloc_urb(0, GFP_KERNEL); @@ -604,7 +607,7 @@ static int gs_can_open(struct net_device *netdev) buf = usb_alloc_coherent(dev->udev, sizeof(struct gs_host_frame), GFP_KERNEL, - &urb->transfer_dma); + &buf_dma); if (!buf) { netdev_err(netdev, "No memory left for USB buffer\n"); @@ -612,6 +615,8 @@ static int gs_can_open(struct net_device *netdev) return -ENOMEM; } + urb->transfer_dma = buf_dma; + /* fill, anchor, and submit rx urb */ usb_fill_bulk_urb(urb, dev->udev, @@ -635,10 +640,17 @@ static int gs_can_open(struct net_device *netdev) rc); usb_unanchor_urb(urb); + usb_free_coherent(dev->udev, + sizeof(struct gs_host_frame), + buf, + buf_dma); usb_free_urb(urb); break; } + dev->rxbuf[i] = buf; + dev->rxbuf_dma[i] = buf_dma; + /* Drop reference, * USB core will take care of freeing it */ @@ -703,13 +715,20 @@ static int gs_can_close(struct net_device *netdev) int rc; struct gs_can *dev = netdev_priv(netdev); struct gs_usb *parent = dev->parent; + unsigned int i; netif_stop_queue(netdev); /* Stop polling */ parent->active_channels--; - if (!parent->active_channels) + if (!parent->active_channels) { usb_kill_anchored_urbs(&parent->rx_submitted); + for (i = 0; i < GS_MAX_RX_URBS; i++) + usb_free_coherent(dev->udev, + sizeof(struct gs_host_frame), + dev->rxbuf[i], + dev->rxbuf_dma[i]); + } /* Stop sending URBs */ usb_kill_anchored_urbs(&dev->tx_submitted); From f4d90e9c95d4c8642e4f39d1f152a5a0ec902df6 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Mon, 23 May 2022 17:18:33 +0200 Subject: [PATCH 0274/1056] can: m_can: m_can_chip_config(): actually enable internal timestamping commit 5b12933de4e76ec164031c18ce8e0904abf530d7 upstream. In commit df06fd678260 ("can: m_can: m_can_chip_config(): enable and configure internal timestamps") the timestamping in the m_can core should be enabled. In peripheral mode, the RX'ed CAN frames, TX compete frames and error events are sorted by the timestamp. The above mentioned commit however forgot to enable the timestamping. Add the missing bits to enable the timestamp counter to the write of the Timestamp Counter Configuration register. Link: https://lore.kernel.org/all/20220612212708.4081756-1-mkl@pengutronix.de Fixes: df06fd678260 ("can: m_can: m_can_chip_config(): enable and configure internal timestamps") Cc: # 5.13 Cc: Torin Cooper-Bennun Reviewed-by: Chandrasekar Ramakrishnan Signed-off-by: Marc Kleine-Budde Signed-off-by: Greg Kroah-Hartman --- drivers/net/can/m_can/m_can.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c index bff33a619acde0..9a35e193d8ae61 100644 --- a/drivers/net/can/m_can/m_can.c +++ b/drivers/net/can/m_can/m_can.c @@ -1367,7 +1367,9 @@ static void m_can_chip_config(struct net_device *dev) /* enable internal timestamp generation, with a prescalar of 16. The * prescalar is applied to the nominal bit timing */ - m_can_write(cdev, M_CAN_TSCC, FIELD_PREP(TSCC_TCP_MASK, 0xf)); + m_can_write(cdev, M_CAN_TSCC, + FIELD_PREP(TSCC_TCP_MASK, 0xf) | + FIELD_PREP(TSCC_TSS_MASK, TSCC_TSS_INTERNAL)); m_can_config_endisable(cdev, false); From c7333f79888497bfd75dcd02a94eaf836dd1042c Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Mon, 30 May 2022 19:30:28 +0200 Subject: [PATCH 0275/1056] can: m_can: m_can_{read_fifo,echo_tx_event}(): shift timestamp to full 32 bits commit 4c3333693f07313f5f0145a922f14a7d3c0f4f21 upstream. In commit 1be37d3b0414 ("can: m_can: fix periph RX path: use rx-offload to ensure skbs are sent from softirq context") the RX path for peripheral devices was switched to RX-offload. Received CAN frames are pushed to RX-offload together with a timestamp. RX-offload is designed to handle overflows of the timestamp correctly, if 32 bit timestamps are provided. The timestamps of m_can core are only 16 bits wide. So this patch shifts them to full 32 bit before passing them to RX-offload. Link: https://lore.kernel.org/all/20220612211410.4081390-1-mkl@pengutronix.de Fixes: 1be37d3b0414 ("can: m_can: fix periph RX path: use rx-offload to ensure skbs are sent from softirq context") Cc: # 5.13 Cc: Torin Cooper-Bennun Reviewed-by: Chandrasekar Ramakrishnan Signed-off-by: Marc Kleine-Budde Signed-off-by: Greg Kroah-Hartman --- drivers/net/can/m_can/m_can.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c index 9a35e193d8ae61..c4596fbe6d2f85 100644 --- a/drivers/net/can/m_can/m_can.c +++ b/drivers/net/can/m_can/m_can.c @@ -532,7 +532,7 @@ static int m_can_read_fifo(struct net_device *dev, u32 rxfs) stats->rx_packets++; stats->rx_bytes += cf->len; - timestamp = FIELD_GET(RX_BUF_RXTS_MASK, fifo_header.dlc); + timestamp = FIELD_GET(RX_BUF_RXTS_MASK, fifo_header.dlc) << 16; m_can_receive_skb(cdev, skb, timestamp); @@ -1043,7 +1043,7 @@ static int m_can_echo_tx_event(struct net_device *dev) } msg_mark = FIELD_GET(TX_EVENT_MM_MASK, txe); - timestamp = FIELD_GET(TX_EVENT_TXTS_MASK, txe); + timestamp = FIELD_GET(TX_EVENT_TXTS_MASK, txe) << 16; /* ack txe element */ m_can_write(cdev, M_CAN_TXEFA, FIELD_PREP(TXEFA_EFAI_MASK, From 0cab3fb917c579aa2c4b99df25b97bd76eb187fa Mon Sep 17 00:00:00 2001 From: Thomas Kopp Date: Tue, 21 Dec 2021 22:24:52 +0000 Subject: [PATCH 0276/1056] can: mcp251xfd: mcp251xfd_regmap_crc_read(): improve workaround handling for mcp2517fd commit 406cc9cdb3e8d644b15e8028948f091b82abdbca upstream. The mcp251xfd compatible chips have an erratum ([1], [2]), where the received CRC doesn't match the calculated CRC. In commit c7eb923c3caf ("can: mcp251xfd: mcp251xfd_regmap_crc_read(): work around broken CRC on TBC register") the following workaround was implementierend. - If a CRC read error on the TBC register is detected and the first byte is 0x00 or 0x80, the most significant bit of the first byte is flipped and the CRC is calculated again. - If the CRC now matches, the _original_ data is passed to the reader. For now we assume transferred data was OK. Measurements on the mcp2517fd show that the workaround is applicable not only of the lowest byte is 0x00 or 0x80, but also if 3 least significant bits are set. Update check on 1st data byte and workaround description accordingly. [1] mcp2517fd: DS80000792C: "Incorrect CRC for certain READ_CRC commands" [2] mcp2518fd: DS80000789C: "Incorrect CRC for certain READ_CRC commands" Link: https://lore.kernel.org/all/DM4PR11MB53901D49578FE265B239E55AFB7C9@DM4PR11MB5390.namprd11.prod.outlook.com Fixes: c7eb923c3caf ("can: mcp251xfd: mcp251xfd_regmap_crc_read(): work around broken CRC on TBC register") Cc: stable@vger.kernel.org Reported-by: Pavel Modilaynen Signed-off-by: Thomas Kopp [mkl: split into 2 patches, update patch description and documentation] Signed-off-by: Marc Kleine-Budde Signed-off-by: Greg Kroah-Hartman --- drivers/net/can/spi/mcp251xfd/mcp251xfd-regmap.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-regmap.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-regmap.c index 297491516a265d..299aae08c7b88b 100644 --- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-regmap.c +++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-regmap.c @@ -325,10 +325,12 @@ mcp251xfd_regmap_crc_read(void *context, * register. It increments once per SYS clock tick, * which is 20 or 40 MHz. * - * Observation shows that if the lowest byte (which is - * transferred first on the SPI bus) of that register - * is 0x00 or 0x80 the calculated CRC doesn't always - * match the transferred one. + * Observation on the mcp2518fd shows that if the + * lowest byte (which is transferred first on the SPI + * bus) of that register is 0x00 or 0x80 the + * calculated CRC doesn't always match the transferred + * one. On the mcp2517fd this problem is not limited + * to the first byte being 0x00 or 0x80. * * If the highest bit in the lowest byte is flipped * the transferred CRC matches the calculated one. We @@ -337,7 +339,8 @@ mcp251xfd_regmap_crc_read(void *context, * correct. */ if (reg == MCP251XFD_REG_TBC && - (buf_rx->data[0] == 0x0 || buf_rx->data[0] == 0x80)) { + ((buf_rx->data[0] & 0xf8) == 0x0 || + (buf_rx->data[0] & 0xf8) == 0x80)) { /* Flip highest bit in lowest byte of le32 */ buf_rx->data[0] ^= 0x80; From f7c9b38cc5a249abb24e5bf51fda7d7c62d1e11c Mon Sep 17 00:00:00 2001 From: Thomas Kopp Date: Tue, 21 Dec 2021 22:24:52 +0000 Subject: [PATCH 0277/1056] can: mcp251xfd: mcp251xfd_regmap_crc_read(): update workaround broken CRC on TBC register commit e3d4ee7d5f7f5256dfe89219afcc7a2d553b731f upstream. The mcp251xfd compatible chips have an erratum ([1], [2]), where the received CRC doesn't match the calculated CRC. In commit c7eb923c3caf ("can: mcp251xfd: mcp251xfd_regmap_crc_read(): work around broken CRC on TBC register") the following workaround was implementierend. - If a CRC read error on the TBC register is detected and the first byte is 0x00 or 0x80, the most significant bit of the first byte is flipped and the CRC is calculated again. - If the CRC now matches, the _original_ data is passed to the reader. For now we assume transferred data was OK. New investigations and simulations indicate that the CRC send by the device is calculated on correct data, and the data is incorrectly received by the SPI host controller. Use flipped instead of original data and update workaround description in mcp251xfd_regmap_crc_read(). [1] mcp2517fd: DS80000792C: "Incorrect CRC for certain READ_CRC commands" [2] mcp2518fd: DS80000789C: "Incorrect CRC for certain READ_CRC commands" Link: https://lore.kernel.org/all/DM4PR11MB53901D49578FE265B239E55AFB7C9@DM4PR11MB5390.namprd11.prod.outlook.com Fixes: c7eb923c3caf ("can: mcp251xfd: mcp251xfd_regmap_crc_read(): work around broken CRC on TBC register") Cc: stable@vger.kernel.org Signed-off-by: Thomas Kopp [mkl: split into 2 patches, update patch description and documentation] Signed-off-by: Marc Kleine-Budde Signed-off-by: Greg Kroah-Hartman --- drivers/net/can/spi/mcp251xfd/mcp251xfd-regmap.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-regmap.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-regmap.c index 299aae08c7b88b..bb559663a3fa5f 100644 --- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-regmap.c +++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-regmap.c @@ -334,9 +334,8 @@ mcp251xfd_regmap_crc_read(void *context, * * If the highest bit in the lowest byte is flipped * the transferred CRC matches the calculated one. We - * assume for now the CRC calculation in the chip - * works on wrong data and the transferred data is - * correct. + * assume for now the CRC operates on the correct + * data. */ if (reg == MCP251XFD_REG_TBC && ((buf_rx->data[0] & 0xf8) == 0x0 || @@ -350,10 +349,8 @@ mcp251xfd_regmap_crc_read(void *context, val_len); if (!err) { /* If CRC is now correct, assume - * transferred data was OK, flip bit - * back to original value. + * flipped data is OK. */ - buf_rx->data[0] ^= 0x80; goto out; } } From a703cbdd791b5a1fec6e378e897f3027dff3c313 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 1 Jul 2022 14:47:24 +0200 Subject: [PATCH 0278/1056] bpf: Fix incorrect verifier simulation around jmp32's jeq/jne commit a12ca6277eca6aeeccf66e840c23a2b520e24c8f upstream. Kuee reported a quirk in the jmp32's jeq/jne simulation, namely that the register value does not match expectations for the fall-through path. For example: Before fix: 0: R1=ctx(off=0,imm=0) R10=fp0 0: (b7) r2 = 0 ; R2_w=P0 1: (b7) r6 = 563 ; R6_w=P563 2: (87) r2 = -r2 ; R2_w=Pscalar() 3: (87) r2 = -r2 ; R2_w=Pscalar() 4: (4c) w2 |= w6 ; R2_w=Pscalar(umin=563,umax=4294967295,var_off=(0x233; 0xfffffdcc),s32_min=-2147483085) R6_w=P563 5: (56) if w2 != 0x8 goto pc+1 ; R2_w=P571 <--- [*] 6: (95) exit R0 !read_ok After fix: 0: R1=ctx(off=0,imm=0) R10=fp0 0: (b7) r2 = 0 ; R2_w=P0 1: (b7) r6 = 563 ; R6_w=P563 2: (87) r2 = -r2 ; R2_w=Pscalar() 3: (87) r2 = -r2 ; R2_w=Pscalar() 4: (4c) w2 |= w6 ; R2_w=Pscalar(umin=563,umax=4294967295,var_off=(0x233; 0xfffffdcc),s32_min=-2147483085) R6_w=P563 5: (56) if w2 != 0x8 goto pc+1 ; R2_w=P8 <--- [*] 6: (95) exit R0 !read_ok As can be seen on line 5 for the branch fall-through path in R2 [*] is that given condition w2 != 0x8 is false, verifier should conclude that r2 = 8 as upper 32 bit are known to be zero. However, verifier incorrectly concludes that r2 = 571 which is far off. The problem is it only marks false{true}_reg as known in the switch for JE/NE case, but at the end of the function, it uses {false,true}_{64,32}off to update {false,true}_reg->var_off and they still hold the prior value of {false,true}_reg->var_off before it got marked as known. The subsequent __reg_combine_32_into_64() then propagates this old var_off and derives new bounds. The information between min/max bounds on {false,true}_reg from setting the register to known const combined with the {false,true}_reg->var_off based on the old information then derives wrong register data. Fix it by detangling the BPF_JEQ/BPF_JNE cases and updating relevant {false,true}_{64,32}off tnums along with the register marking to known constant. Fixes: 3f50f132d840 ("bpf: Verifier, do explicit ALU32 bounds tracking") Reported-by: Kuee K1r0a Signed-off-by: Daniel Borkmann Signed-off-by: Andrii Nakryiko Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20220701124727.11153-1-daniel@iogearbox.net Signed-off-by: Greg Kroah-Hartman --- kernel/bpf/verifier.c | 41 ++++++++++++++++++++++++----------------- 1 file changed, 24 insertions(+), 17 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 25ee8d9572c6fb..3e2c924be7b2bb 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -8591,26 +8591,33 @@ static void reg_set_min_max(struct bpf_reg_state *true_reg, return; switch (opcode) { + /* JEQ/JNE comparison doesn't change the register equivalence. + * + * r1 = r2; + * if (r1 == 42) goto label; + * ... + * label: // here both r1 and r2 are known to be 42. + * + * Hence when marking register as known preserve it's ID. + */ case BPF_JEQ: + if (is_jmp32) { + __mark_reg32_known(true_reg, val32); + true_32off = tnum_subreg(true_reg->var_off); + } else { + ___mark_reg_known(true_reg, val); + true_64off = true_reg->var_off; + } + break; case BPF_JNE: - { - struct bpf_reg_state *reg = - opcode == BPF_JEQ ? true_reg : false_reg; - - /* JEQ/JNE comparison doesn't change the register equivalence. - * r1 = r2; - * if (r1 == 42) goto label; - * ... - * label: // here both r1 and r2 are known to be 42. - * - * Hence when marking register as known preserve it's ID. - */ - if (is_jmp32) - __mark_reg32_known(reg, val32); - else - ___mark_reg_known(reg, val); + if (is_jmp32) { + __mark_reg32_known(false_reg, val32); + false_32off = tnum_subreg(false_reg->var_off); + } else { + ___mark_reg_known(false_reg, val); + false_64off = false_reg->var_off; + } break; - } case BPF_JSET: if (is_jmp32) { false_32off = tnum_and(false_32off, tnum_const(~val32)); From a7de8d436db92bab8b1f44624297c2554a6ac36b Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 1 Jul 2022 14:47:25 +0200 Subject: [PATCH 0279/1056] bpf: Fix insufficient bounds propagation from adjust_scalar_min_max_vals commit 3844d153a41adea718202c10ae91dc96b37453b5 upstream. Kuee reported a corner case where the tnum becomes constant after the call to __reg_bound_offset(), but the register's bounds are not, that is, its min bounds are still not equal to the register's max bounds. This in turn allows to leak pointers through turning a pointer register as is into an unknown scalar via adjust_ptr_min_max_vals(). Before: func#0 @0 0: R1=ctx(off=0,imm=0,umax=0,var_off=(0x0; 0x0)) R10=fp(off=0,imm=0,umax=0,var_off=(0x0; 0x0)) 0: (b7) r0 = 1 ; R0_w=scalar(imm=1,umin=1,umax=1,var_off=(0x1; 0x0)) 1: (b7) r3 = 0 ; R3_w=scalar(imm=0,umax=0,var_off=(0x0; 0x0)) 2: (87) r3 = -r3 ; R3_w=scalar() 3: (87) r3 = -r3 ; R3_w=scalar() 4: (47) r3 |= 32767 ; R3_w=scalar(smin=-9223372036854743041,umin=32767,var_off=(0x7fff; 0xffffffffffff8000),s32_min=-2147450881) 5: (75) if r3 s>= 0x0 goto pc+1 ; R3_w=scalar(umin=9223372036854808575,var_off=(0x8000000000007fff; 0x7fffffffffff8000),s32_min=-2147450881,u32_min=32767) 6: (95) exit from 5 to 7: R0=scalar(imm=1,umin=1,umax=1,var_off=(0x1; 0x0)) R1=ctx(off=0,imm=0,umax=0,var_off=(0x0; 0x0)) R3=scalar(umin=32767,umax=9223372036854775807,var_off=(0x7fff; 0x7fffffffffff8000),s32_min=-2147450881) R10=fp(off=0,imm=0,umax=0,var_off=(0x0; 0x0)) 7: (d5) if r3 s<= 0x8000 goto pc+1 ; R3=scalar(umin=32769,umax=9223372036854775807,var_off=(0x7fff; 0x7fffffffffff8000),s32_min=-2147450881,u32_min=32767) 8: (95) exit from 7 to 9: R0=scalar(imm=1,umin=1,umax=1,var_off=(0x1; 0x0)) R1=ctx(off=0,imm=0,umax=0,var_off=(0x0; 0x0)) R3=scalar(umin=32767,umax=32768,var_off=(0x7fff; 0x8000)) R10=fp(off=0,imm=0,umax=0,var_off=(0x0; 0x0)) 9: (07) r3 += -32767 ; R3_w=scalar(imm=0,umax=1,var_off=(0x0; 0x0)) <--- [*] 10: (95) exit What can be seen here is that R3=scalar(umin=32767,umax=32768,var_off=(0x7fff; 0x8000)) after the operation R3 += -32767 results in a 'malformed' constant, that is, R3_w=scalar(imm=0,umax=1,var_off=(0x0; 0x0)). Intersecting with var_off has not been done at that point via __update_reg_bounds(), which would have improved the umax to be equal to umin. Refactor the tnum <> min/max bounds information flow into a reg_bounds_sync() helper and use it consistently everywhere. After the fix, bounds have been corrected to R3_w=scalar(imm=0,umax=0,var_off=(0x0; 0x0)) and thus the register is regarded as a 'proper' constant scalar of 0. After: func#0 @0 0: R1=ctx(off=0,imm=0,umax=0,var_off=(0x0; 0x0)) R10=fp(off=0,imm=0,umax=0,var_off=(0x0; 0x0)) 0: (b7) r0 = 1 ; R0_w=scalar(imm=1,umin=1,umax=1,var_off=(0x1; 0x0)) 1: (b7) r3 = 0 ; R3_w=scalar(imm=0,umax=0,var_off=(0x0; 0x0)) 2: (87) r3 = -r3 ; R3_w=scalar() 3: (87) r3 = -r3 ; R3_w=scalar() 4: (47) r3 |= 32767 ; R3_w=scalar(smin=-9223372036854743041,umin=32767,var_off=(0x7fff; 0xffffffffffff8000),s32_min=-2147450881) 5: (75) if r3 s>= 0x0 goto pc+1 ; R3_w=scalar(umin=9223372036854808575,var_off=(0x8000000000007fff; 0x7fffffffffff8000),s32_min=-2147450881,u32_min=32767) 6: (95) exit from 5 to 7: R0=scalar(imm=1,umin=1,umax=1,var_off=(0x1; 0x0)) R1=ctx(off=0,imm=0,umax=0,var_off=(0x0; 0x0)) R3=scalar(umin=32767,umax=9223372036854775807,var_off=(0x7fff; 0x7fffffffffff8000),s32_min=-2147450881) R10=fp(off=0,imm=0,umax=0,var_off=(0x0; 0x0)) 7: (d5) if r3 s<= 0x8000 goto pc+1 ; R3=scalar(umin=32769,umax=9223372036854775807,var_off=(0x7fff; 0x7fffffffffff8000),s32_min=-2147450881,u32_min=32767) 8: (95) exit from 7 to 9: R0=scalar(imm=1,umin=1,umax=1,var_off=(0x1; 0x0)) R1=ctx(off=0,imm=0,umax=0,var_off=(0x0; 0x0)) R3=scalar(umin=32767,umax=32768,var_off=(0x7fff; 0x8000)) R10=fp(off=0,imm=0,umax=0,var_off=(0x0; 0x0)) 9: (07) r3 += -32767 ; R3_w=scalar(imm=0,umax=0,var_off=(0x0; 0x0)) <--- [*] 10: (95) exit Fixes: b03c9f9fdc37 ("bpf/verifier: track signed and unsigned min/max values") Reported-by: Kuee K1r0a Signed-off-by: Daniel Borkmann Signed-off-by: Andrii Nakryiko Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20220701124727.11153-2-daniel@iogearbox.net Signed-off-by: Greg Kroah-Hartman --- kernel/bpf/verifier.c | 72 ++++++++++++++----------------------------- 1 file changed, 23 insertions(+), 49 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 3e2c924be7b2bb..b72ed3ae00d847 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1333,6 +1333,21 @@ static void __reg_bound_offset(struct bpf_reg_state *reg) reg->var_off = tnum_or(tnum_clear_subreg(var64_off), var32_off); } +static void reg_bounds_sync(struct bpf_reg_state *reg) +{ + /* We might have learned new bounds from the var_off. */ + __update_reg_bounds(reg); + /* We might have learned something about the sign bit. */ + __reg_deduce_bounds(reg); + /* We might have learned some bits from the bounds. */ + __reg_bound_offset(reg); + /* Intersecting with the old var_off might have improved our bounds + * slightly, e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc), + * then new var_off is (0; 0x7f...fc) which improves our umax. + */ + __update_reg_bounds(reg); +} + static bool __reg32_bound_s64(s32 a) { return a >= 0 && a <= S32_MAX; @@ -1374,16 +1389,8 @@ static void __reg_combine_32_into_64(struct bpf_reg_state *reg) * so they do not impact tnum bounds calculation. */ __mark_reg64_unbounded(reg); - __update_reg_bounds(reg); } - - /* Intersecting with the old var_off might have improved our bounds - * slightly. e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc), - * then new var_off is (0; 0x7f...fc) which improves our umax. - */ - __reg_deduce_bounds(reg); - __reg_bound_offset(reg); - __update_reg_bounds(reg); + reg_bounds_sync(reg); } static bool __reg64_bound_s32(s64 a) @@ -1399,7 +1406,6 @@ static bool __reg64_bound_u32(u64 a) static void __reg_combine_64_into_32(struct bpf_reg_state *reg) { __mark_reg32_unbounded(reg); - if (__reg64_bound_s32(reg->smin_value) && __reg64_bound_s32(reg->smax_value)) { reg->s32_min_value = (s32)reg->smin_value; reg->s32_max_value = (s32)reg->smax_value; @@ -1408,14 +1414,7 @@ static void __reg_combine_64_into_32(struct bpf_reg_state *reg) reg->u32_min_value = (u32)reg->umin_value; reg->u32_max_value = (u32)reg->umax_value; } - - /* Intersecting with the old var_off might have improved our bounds - * slightly. e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc), - * then new var_off is (0; 0x7f...fc) which improves our umax. - */ - __reg_deduce_bounds(reg); - __reg_bound_offset(reg); - __update_reg_bounds(reg); + reg_bounds_sync(reg); } /* Mark a register as having a completely unknown (scalar) value. */ @@ -6054,9 +6053,7 @@ static void do_refine_retval_range(struct bpf_reg_state *regs, int ret_type, ret_reg->s32_max_value = meta->msize_max_value; ret_reg->smin_value = -MAX_ERRNO; ret_reg->s32_min_value = -MAX_ERRNO; - __reg_deduce_bounds(ret_reg); - __reg_bound_offset(ret_reg); - __update_reg_bounds(ret_reg); + reg_bounds_sync(ret_reg); } static int @@ -7216,11 +7213,7 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, if (!check_reg_sane_offset(env, dst_reg, ptr_reg->type)) return -EINVAL; - - __update_reg_bounds(dst_reg); - __reg_deduce_bounds(dst_reg); - __reg_bound_offset(dst_reg); - + reg_bounds_sync(dst_reg); if (sanitize_check_bounds(env, insn, dst_reg) < 0) return -EACCES; if (sanitize_needed(opcode)) { @@ -7958,10 +7951,7 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env, /* ALU32 ops are zero extended into 64bit register */ if (alu32) zext_32_to_64(dst_reg); - - __update_reg_bounds(dst_reg); - __reg_deduce_bounds(dst_reg); - __reg_bound_offset(dst_reg); + reg_bounds_sync(dst_reg); return 0; } @@ -8150,10 +8140,7 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn) insn->dst_reg); } zext_32_to_64(dst_reg); - - __update_reg_bounds(dst_reg); - __reg_deduce_bounds(dst_reg); - __reg_bound_offset(dst_reg); + reg_bounds_sync(dst_reg); } } else { /* case: R = imm @@ -8756,21 +8743,8 @@ static void __reg_combine_min_max(struct bpf_reg_state *src_reg, dst_reg->smax_value); src_reg->var_off = dst_reg->var_off = tnum_intersect(src_reg->var_off, dst_reg->var_off); - /* We might have learned new bounds from the var_off. */ - __update_reg_bounds(src_reg); - __update_reg_bounds(dst_reg); - /* We might have learned something about the sign bit. */ - __reg_deduce_bounds(src_reg); - __reg_deduce_bounds(dst_reg); - /* We might have learned some bits from the bounds. */ - __reg_bound_offset(src_reg); - __reg_bound_offset(dst_reg); - /* Intersecting with the old var_off might have improved our bounds - * slightly. e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc), - * then new var_off is (0; 0x7f...fc) which improves our umax. - */ - __update_reg_bounds(src_reg); - __update_reg_bounds(dst_reg); + reg_bounds_sync(src_reg); + reg_bounds_sync(dst_reg); } static void reg_combine_min_max(struct bpf_reg_state *true_src, From db89582ff330556188da856e01382ccbf3a5e706 Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Tue, 5 Jul 2022 14:53:51 +0200 Subject: [PATCH 0280/1056] usbnet: fix memory leak in error case commit b55a21b764c1e182014630fa5486d717484ac58f upstream. usbnet_write_cmd_async() mixed up which buffers need to be freed in which error case. v2: add Fixes tag v3: fix uninitialized buf pointer Fixes: 877bd862f32b8 ("usbnet: introduce usbnet 3 command helpers") Signed-off-by: Oliver Neukum Link: https://lore.kernel.org/r/20220705125351.17309-1-oneukum@suse.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/net/usb/usbnet.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c index a0ea236ac60e02..71bb0849ce480f 100644 --- a/drivers/net/usb/usbnet.c +++ b/drivers/net/usb/usbnet.c @@ -2135,7 +2135,7 @@ static void usbnet_async_cmd_cb(struct urb *urb) int usbnet_write_cmd_async(struct usbnet *dev, u8 cmd, u8 reqtype, u16 value, u16 index, const void *data, u16 size) { - struct usb_ctrlrequest *req = NULL; + struct usb_ctrlrequest *req; struct urb *urb; int err = -ENOMEM; void *buf = NULL; @@ -2153,7 +2153,7 @@ int usbnet_write_cmd_async(struct usbnet *dev, u8 cmd, u8 reqtype, if (!buf) { netdev_err(dev->net, "Error allocating buffer" " in %s!\n", __func__); - goto fail_free; + goto fail_free_urb; } } @@ -2177,14 +2177,21 @@ int usbnet_write_cmd_async(struct usbnet *dev, u8 cmd, u8 reqtype, if (err < 0) { netdev_err(dev->net, "Error submitting the control" " message: status=%d\n", err); - goto fail_free; + goto fail_free_all; } return 0; +fail_free_all: + kfree(req); fail_free_buf: kfree(buf); -fail_free: - kfree(req); + /* + * avoid a double free + * needed because the flag can be set only + * after filling the URB + */ + urb->transfer_flags = 0; +fail_free_urb: usb_free_urb(urb); fail: return err; From 75e9009edabcc2a00840168ee83ef881146cffbe Mon Sep 17 00:00:00 2001 From: Duoming Zhou Date: Tue, 5 Jul 2022 20:56:10 +0800 Subject: [PATCH 0281/1056] net: rose: fix UAF bug caused by rose_t0timer_expiry commit 148ca04518070910739dfc4eeda765057856403d upstream. There are UAF bugs caused by rose_t0timer_expiry(). The root cause is that del_timer() could not stop the timer handler that is running and there is no synchronization. One of the race conditions is shown below: (thread 1) | (thread 2) | rose_device_event | rose_rt_device_down | rose_remove_neigh rose_t0timer_expiry | rose_stop_t0timer(rose_neigh) ... | del_timer(&neigh->t0timer) | kfree(rose_neigh) //[1]FREE neigh->dce_mode //[2]USE | The rose_neigh is deallocated in position [1] and use in position [2]. The crash trace triggered by POC is like below: BUG: KASAN: use-after-free in expire_timers+0x144/0x320 Write of size 8 at addr ffff888009b19658 by task swapper/0/0 ... Call Trace: dump_stack_lvl+0xbf/0xee print_address_description+0x7b/0x440 print_report+0x101/0x230 ? expire_timers+0x144/0x320 kasan_report+0xed/0x120 ? expire_timers+0x144/0x320 expire_timers+0x144/0x320 __run_timers+0x3ff/0x4d0 run_timer_softirq+0x41/0x80 __do_softirq+0x233/0x544 ... This patch changes rose_stop_ftimer() and rose_stop_t0timer() in rose_remove_neigh() to del_timer_sync() in order that the timer handler could be finished before the resources such as rose_neigh and so on are deallocated. As a result, the UAF bugs could be mitigated. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Duoming Zhou Link: https://lore.kernel.org/r/20220705125610.77971-1-duoming@zju.edu.cn Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/rose/rose_route.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/rose/rose_route.c b/net/rose/rose_route.c index c0e04c261a1563..764a726debb1ff 100644 --- a/net/rose/rose_route.c +++ b/net/rose/rose_route.c @@ -227,8 +227,8 @@ static void rose_remove_neigh(struct rose_neigh *rose_neigh) { struct rose_neigh *s; - rose_stop_ftimer(rose_neigh); - rose_stop_t0timer(rose_neigh); + del_timer_sync(&rose_neigh->ftimer); + del_timer_sync(&rose_neigh->t0timer); skb_queue_purge(&rose_neigh->queue); From 5ccecafc728b0df48263d5ac198220bcd79830bc Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sat, 2 Jul 2022 04:16:31 +0200 Subject: [PATCH 0282/1056] netfilter: nft_set_pipapo: release elements in clone from abort path commit 9827a0e6e23bf43003cd3d5b7fb11baf59a35e1e upstream. New elements that reside in the clone are not released in case that the transaction is aborted. [16302.231754] ------------[ cut here ]------------ [16302.231756] WARNING: CPU: 0 PID: 100509 at net/netfilter/nf_tables_api.c:1864 nf_tables_chain_destroy+0x26/0x127 [nf_tables] [...] [16302.231882] CPU: 0 PID: 100509 Comm: nft Tainted: G W 5.19.0-rc3+ #155 [...] [16302.231887] RIP: 0010:nf_tables_chain_destroy+0x26/0x127 [nf_tables] [16302.231899] Code: f3 fe ff ff 41 55 41 54 55 53 48 8b 6f 10 48 89 fb 48 c7 c7 82 96 d9 a0 8b 55 50 48 8b 75 58 e8 de f5 92 e0 83 7d 50 00 74 09 <0f> 0b 5b 5d 41 5c 41 5d c3 4c 8b 65 00 48 8b 7d 08 49 39 fc 74 05 [...] [16302.231917] Call Trace: [16302.231919] [16302.231921] __nf_tables_abort.cold+0x23/0x28 [nf_tables] [16302.231934] nf_tables_abort+0x30/0x50 [nf_tables] [16302.231946] nfnetlink_rcv_batch+0x41a/0x840 [nfnetlink] [16302.231952] ? __nla_validate_parse+0x48/0x190 [16302.231959] nfnetlink_rcv+0x110/0x129 [nfnetlink] [16302.231963] netlink_unicast+0x211/0x340 [16302.231969] netlink_sendmsg+0x21e/0x460 Add nft_set_pipapo_match_destroy() helper function to release the elements in the lookup tables. Stefano Brivio says: "We additionally look for elements pointers in the cloned matching data if priv->dirty is set, because that means that cloned data might point to additional elements we did not commit to the working copy yet (such as the abort path case, but perhaps not limited to it)." Fixes: 3c4287f62044 ("nf_tables: Add set type for arbitrary concatenation of ranges") Reviewed-by: Stefano Brivio Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- net/netfilter/nft_set_pipapo.c | 48 +++++++++++++++++++++++----------- 1 file changed, 33 insertions(+), 15 deletions(-) diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c index 2c8051d8cca69c..4f9299b9dcddc1 100644 --- a/net/netfilter/nft_set_pipapo.c +++ b/net/netfilter/nft_set_pipapo.c @@ -2124,6 +2124,32 @@ static int nft_pipapo_init(const struct nft_set *set, return err; } +/** + * nft_set_pipapo_match_destroy() - Destroy elements from key mapping array + * @set: nftables API set representation + * @m: matching data pointing to key mapping array + */ +static void nft_set_pipapo_match_destroy(const struct nft_set *set, + struct nft_pipapo_match *m) +{ + struct nft_pipapo_field *f; + int i, r; + + for (i = 0, f = m->f; i < m->field_count - 1; i++, f++) + ; + + for (r = 0; r < f->rules; r++) { + struct nft_pipapo_elem *e; + + if (r < f->rules - 1 && f->mt[r + 1].e == f->mt[r].e) + continue; + + e = f->mt[r].e; + + nft_set_elem_destroy(set, e, true); + } +} + /** * nft_pipapo_destroy() - Free private data for set and all committed elements * @set: nftables API set representation @@ -2132,26 +2158,13 @@ static void nft_pipapo_destroy(const struct nft_set *set) { struct nft_pipapo *priv = nft_set_priv(set); struct nft_pipapo_match *m; - struct nft_pipapo_field *f; - int i, r, cpu; + int cpu; m = rcu_dereference_protected(priv->match, true); if (m) { rcu_barrier(); - for (i = 0, f = m->f; i < m->field_count - 1; i++, f++) - ; - - for (r = 0; r < f->rules; r++) { - struct nft_pipapo_elem *e; - - if (r < f->rules - 1 && f->mt[r + 1].e == f->mt[r].e) - continue; - - e = f->mt[r].e; - - nft_set_elem_destroy(set, e, true); - } + nft_set_pipapo_match_destroy(set, m); #ifdef NFT_PIPAPO_ALIGN free_percpu(m->scratch_aligned); @@ -2165,6 +2178,11 @@ static void nft_pipapo_destroy(const struct nft_set *set) } if (priv->clone) { + m = priv->clone; + + if (priv->dirty) + nft_set_pipapo_match_destroy(set, m); + #ifdef NFT_PIPAPO_ALIGN free_percpu(priv->clone->scratch_aligned); #endif From c1784d2075138992b00c17ab4ffc6d855171fe6d Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sat, 2 Jul 2022 04:16:30 +0200 Subject: [PATCH 0283/1056] netfilter: nf_tables: stricter validation of element data commit 7e6bc1f6cabcd30aba0b11219d8e01b952eacbb6 upstream. Make sure element data type and length do not mismatch the one specified by the set declaration. Fixes: 7d7402642eaf ("netfilter: nf_tables: variable sized set element keys / data") Reported-by: Hugues ANGUELKOV Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- net/netfilter/nf_tables_api.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 1b4bc588f8d63d..65d96439e2be70 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -5118,13 +5118,20 @@ static int nft_setelem_parse_data(struct nft_ctx *ctx, struct nft_set *set, struct nft_data *data, struct nlattr *attr) { + u32 dtype; int err; err = nft_data_init(ctx, data, NFT_DATA_VALUE_MAXLEN, desc, attr); if (err < 0) return err; - if (desc->type != NFT_DATA_VERDICT && desc->len != set->dlen) { + if (set->dtype == NFT_DATA_VERDICT) + dtype = NFT_DATA_VERDICT; + else + dtype = NFT_DATA_VALUE; + + if (dtype != desc->type || + set->dlen != desc->len) { nft_data_release(data, desc->type); return -EINVAL; } From bb5c247155016f0d94e0fcb318790c8c4a3e624f Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Wed, 18 Aug 2021 13:41:19 +0300 Subject: [PATCH 0284/1056] btrfs: rename btrfs_alloc_chunk to btrfs_create_chunk [ Upstream commit f6f39f7a0add4e7fd120a709545b57586a1d0393 ] The user facing function used to allocate new chunks is btrfs_chunk_alloc, unfortunately there is yet another similar sounding function - btrfs_alloc_chunk. This creates confusion, especially since the latter function can be considered "private" in the sense that it implements the first stage of chunk creation and as such is called by btrfs_chunk_alloc. To avoid the awkwardness that comes with having similarly named but distinctly different in their purpose function rename btrfs_alloc_chunk to btrfs_create_chunk, given that the main purpose of this function is to orchestrate the whole process of allocating a chunk - reserving space into devices, deciding on characteristics of the stripe size and creating the in-memory structures. Reviewed-by: Filipe Manana Reviewed-by: Anand Jain Signed-off-by: Nikolay Borisov Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Sasha Levin --- fs/btrfs/block-group.c | 6 +++--- fs/btrfs/volumes.c | 10 +++++----- fs/btrfs/volumes.h | 2 +- fs/btrfs/zoned.c | 2 +- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c index 37418b183b0788..aadc1203ad885e 100644 --- a/fs/btrfs/block-group.c +++ b/fs/btrfs/block-group.c @@ -3400,7 +3400,7 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags) */ check_system_chunk(trans, flags); - bg = btrfs_alloc_chunk(trans, flags); + bg = btrfs_create_chunk(trans, flags); if (IS_ERR(bg)) { ret = PTR_ERR(bg); goto out; @@ -3461,7 +3461,7 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags) const u64 sys_flags = btrfs_system_alloc_profile(trans->fs_info); struct btrfs_block_group *sys_bg; - sys_bg = btrfs_alloc_chunk(trans, sys_flags); + sys_bg = btrfs_create_chunk(trans, sys_flags); if (IS_ERR(sys_bg)) { ret = PTR_ERR(sys_bg); btrfs_abort_transaction(trans, ret); @@ -3754,7 +3754,7 @@ void check_system_chunk(struct btrfs_trans_handle *trans, u64 type) * could deadlock on an extent buffer since our caller may be * COWing an extent buffer from the chunk btree. */ - bg = btrfs_alloc_chunk(trans, flags); + bg = btrfs_create_chunk(trans, flags); if (IS_ERR(bg)) { ret = PTR_ERR(bg); } else if (!(type & BTRFS_BLOCK_GROUP_SYSTEM)) { diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 378e03a93e101a..b75ce79a2540cb 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -3131,7 +3131,7 @@ int btrfs_remove_chunk(struct btrfs_trans_handle *trans, u64 chunk_offset) const u64 sys_flags = btrfs_system_alloc_profile(fs_info); struct btrfs_block_group *sys_bg; - sys_bg = btrfs_alloc_chunk(trans, sys_flags); + sys_bg = btrfs_create_chunk(trans, sys_flags); if (IS_ERR(sys_bg)) { ret = PTR_ERR(sys_bg); btrfs_abort_transaction(trans, ret); @@ -5010,7 +5010,7 @@ static void check_raid1c34_incompat_flag(struct btrfs_fs_info *info, u64 type) } /* - * Structure used internally for __btrfs_alloc_chunk() function. + * Structure used internally for btrfs_create_chunk() function. * Wraps needed parameters. */ struct alloc_chunk_ctl { @@ -5414,7 +5414,7 @@ static struct btrfs_block_group *create_chunk(struct btrfs_trans_handle *trans, return block_group; } -struct btrfs_block_group *btrfs_alloc_chunk(struct btrfs_trans_handle *trans, +struct btrfs_block_group *btrfs_create_chunk(struct btrfs_trans_handle *trans, u64 type) { struct btrfs_fs_info *info = trans->fs_info; @@ -5615,12 +5615,12 @@ static noinline int init_first_rw_device(struct btrfs_trans_handle *trans) */ alloc_profile = btrfs_metadata_alloc_profile(fs_info); - meta_bg = btrfs_alloc_chunk(trans, alloc_profile); + meta_bg = btrfs_create_chunk(trans, alloc_profile); if (IS_ERR(meta_bg)) return PTR_ERR(meta_bg); alloc_profile = btrfs_system_alloc_profile(fs_info); - sys_bg = btrfs_alloc_chunk(trans, alloc_profile); + sys_bg = btrfs_create_chunk(trans, alloc_profile); if (IS_ERR(sys_bg)) return PTR_ERR(sys_bg); diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 4db10d071d67f9..d86a6f9f166cd8 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -454,7 +454,7 @@ int btrfs_get_io_geometry(struct btrfs_fs_info *fs_info, struct extent_map *map, struct btrfs_io_geometry *io_geom); int btrfs_read_sys_array(struct btrfs_fs_info *fs_info); int btrfs_read_chunk_tree(struct btrfs_fs_info *fs_info); -struct btrfs_block_group *btrfs_alloc_chunk(struct btrfs_trans_handle *trans, +struct btrfs_block_group *btrfs_create_chunk(struct btrfs_trans_handle *trans, u64 type); void btrfs_mapping_tree_free(struct extent_map_tree *tree); blk_status_t btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio, diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c index 596b2148807d48..3bc2f92cd197ad 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c @@ -636,7 +636,7 @@ int btrfs_check_zoned_mode(struct btrfs_fs_info *fs_info) /* * stripe_size is always aligned to BTRFS_STRIPE_LEN in - * __btrfs_alloc_chunk(). Since we want stripe_len == zone_size, + * btrfs_create_chunk(). Since we want stripe_len == zone_size, * check the alignment here. */ if (!IS_ALIGNED(zone_size, BTRFS_STRIPE_LEN)) { From 6618205047176275331cfa7d539d9080377beeb5 Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Tue, 12 Oct 2021 11:21:35 +0300 Subject: [PATCH 0285/1056] btrfs: add additional parameters to btrfs_init_tree_ref/btrfs_init_data_ref [ Upstream commit f42c5da6c12e990d8ec415199600b4d593c63bf5 ] In order to make 'real_root' used only in ref-verify it's required to have the necessary context to perform the same checks that this member is used for. So add 'mod_root' which will contain the root on behalf of which a delayed ref was created and a 'skip_group' parameter which will contain callsite-specific override of skip_qgroup. Signed-off-by: Nikolay Borisov Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Sasha Levin --- fs/btrfs/delayed-ref.h | 5 +++-- fs/btrfs/extent-tree.c | 17 +++++++++++------ fs/btrfs/file.c | 13 ++++++++----- fs/btrfs/inode.c | 3 ++- fs/btrfs/relocation.c | 21 ++++++++++++++------- fs/btrfs/tree-log.c | 2 +- 6 files changed, 39 insertions(+), 22 deletions(-) diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h index e22fba272e4fdf..31266ba1d4300b 100644 --- a/fs/btrfs/delayed-ref.h +++ b/fs/btrfs/delayed-ref.h @@ -271,7 +271,7 @@ static inline void btrfs_init_generic_ref(struct btrfs_ref *generic_ref, } static inline void btrfs_init_tree_ref(struct btrfs_ref *generic_ref, - int level, u64 root) + int level, u64 root, u64 mod_root, bool skip_qgroup) { /* If @real_root not set, use @root as fallback */ if (!generic_ref->real_root) @@ -282,7 +282,8 @@ static inline void btrfs_init_tree_ref(struct btrfs_ref *generic_ref, } static inline void btrfs_init_data_ref(struct btrfs_ref *generic_ref, - u64 ref_root, u64 ino, u64 offset) + u64 ref_root, u64 ino, u64 offset, u64 mod_root, + bool skip_qgroup) { /* If @real_root not set, use @root as fallback */ if (!generic_ref->real_root) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 514adc83577f0f..e01b9344fb9c59 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2440,7 +2440,8 @@ static int __btrfs_mod_ref(struct btrfs_trans_handle *trans, num_bytes, parent); generic_ref.real_root = root->root_key.objectid; btrfs_init_data_ref(&generic_ref, ref_root, key.objectid, - key.offset); + key.offset, root->root_key.objectid, + for_reloc); generic_ref.skip_qgroup = for_reloc; if (inc) ret = btrfs_inc_extent_ref(trans, &generic_ref); @@ -2454,7 +2455,8 @@ static int __btrfs_mod_ref(struct btrfs_trans_handle *trans, btrfs_init_generic_ref(&generic_ref, action, bytenr, num_bytes, parent); generic_ref.real_root = root->root_key.objectid; - btrfs_init_tree_ref(&generic_ref, level - 1, ref_root); + btrfs_init_tree_ref(&generic_ref, level - 1, ref_root, + root->root_key.objectid, for_reloc); generic_ref.skip_qgroup = for_reloc; if (inc) ret = btrfs_inc_extent_ref(trans, &generic_ref); @@ -3289,7 +3291,7 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans, btrfs_init_generic_ref(&generic_ref, BTRFS_DROP_DELAYED_REF, buf->start, buf->len, parent); btrfs_init_tree_ref(&generic_ref, btrfs_header_level(buf), - root->root_key.objectid); + root->root_key.objectid, 0, false); if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) { btrfs_ref_tree_mod(fs_info, &generic_ref); @@ -4705,7 +4707,8 @@ int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans, btrfs_init_generic_ref(&generic_ref, BTRFS_ADD_DELAYED_EXTENT, ins->objectid, ins->offset, 0); - btrfs_init_data_ref(&generic_ref, root->root_key.objectid, owner, offset); + btrfs_init_data_ref(&generic_ref, root->root_key.objectid, owner, + offset, 0, false); btrfs_ref_tree_mod(root->fs_info, &generic_ref); return btrfs_add_delayed_data_ref(trans, &generic_ref, ram_bytes); @@ -4898,7 +4901,8 @@ struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans, btrfs_init_generic_ref(&generic_ref, BTRFS_ADD_DELAYED_EXTENT, ins.objectid, ins.offset, parent); generic_ref.real_root = root->root_key.objectid; - btrfs_init_tree_ref(&generic_ref, level, root_objectid); + btrfs_init_tree_ref(&generic_ref, level, root_objectid, + root->root_key.objectid, false); btrfs_ref_tree_mod(fs_info, &generic_ref); ret = btrfs_add_delayed_tree_ref(trans, &generic_ref, extent_op); if (ret) @@ -5315,7 +5319,8 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans, btrfs_init_generic_ref(&ref, BTRFS_DROP_DELAYED_REF, bytenr, fs_info->nodesize, parent); - btrfs_init_tree_ref(&ref, level - 1, root->root_key.objectid); + btrfs_init_tree_ref(&ref, level - 1, root->root_key.objectid, + 0, false); ret = btrfs_free_extent(trans, &ref); if (ret) goto out_unlock; diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index a06c8366a8f4e1..1c597cd6c02475 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -869,7 +869,8 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans, btrfs_init_data_ref(&ref, root->root_key.objectid, new_key.objectid, - args->start - extent_offset); + args->start - extent_offset, + 0, false); ret = btrfs_inc_extent_ref(trans, &ref); BUG_ON(ret); /* -ENOMEM */ } @@ -955,7 +956,8 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans, btrfs_init_data_ref(&ref, root->root_key.objectid, key.objectid, - key.offset - extent_offset); + key.offset - extent_offset, 0, + false); ret = btrfs_free_extent(trans, &ref); BUG_ON(ret); /* -ENOMEM */ args->bytes_found += extent_end - key.offset; @@ -1232,7 +1234,7 @@ int btrfs_mark_extent_written(struct btrfs_trans_handle *trans, btrfs_init_generic_ref(&ref, BTRFS_ADD_DELAYED_REF, bytenr, num_bytes, 0); btrfs_init_data_ref(&ref, root->root_key.objectid, ino, - orig_offset); + orig_offset, 0, false); ret = btrfs_inc_extent_ref(trans, &ref); if (ret) { btrfs_abort_transaction(trans, ret); @@ -1257,7 +1259,8 @@ int btrfs_mark_extent_written(struct btrfs_trans_handle *trans, other_end = 0; btrfs_init_generic_ref(&ref, BTRFS_DROP_DELAYED_REF, bytenr, num_bytes, 0); - btrfs_init_data_ref(&ref, root->root_key.objectid, ino, orig_offset); + btrfs_init_data_ref(&ref, root->root_key.objectid, ino, orig_offset, + 0, false); if (extent_mergeable(leaf, path->slots[0] + 1, ino, bytenr, orig_offset, &other_start, &other_end)) { @@ -2715,7 +2718,7 @@ static int btrfs_insert_replace_extent(struct btrfs_trans_handle *trans, extent_info->disk_len, 0); ref_offset = extent_info->file_offset - extent_info->data_offset; btrfs_init_data_ref(&ref, root->root_key.objectid, - btrfs_ino(inode), ref_offset); + btrfs_ino(inode), ref_offset, 0, false); ret = btrfs_inc_extent_ref(trans, &ref); } diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 044d584c3467c2..d644dcaf300404 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -4919,7 +4919,8 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, extent_start, extent_num_bytes, 0); ref.real_root = root->root_key.objectid; btrfs_init_data_ref(&ref, btrfs_header_owner(leaf), - ino, extent_offset); + ino, extent_offset, + root->root_key.objectid, false); ret = btrfs_free_extent(trans, &ref); if (ret) { btrfs_abort_transaction(trans, ret); diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index a6661f2ad2c003..0300770c0a89d8 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -1147,7 +1147,8 @@ int replace_file_extents(struct btrfs_trans_handle *trans, num_bytes, parent); ref.real_root = root->root_key.objectid; btrfs_init_data_ref(&ref, btrfs_header_owner(leaf), - key.objectid, key.offset); + key.objectid, key.offset, + root->root_key.objectid, false); ret = btrfs_inc_extent_ref(trans, &ref); if (ret) { btrfs_abort_transaction(trans, ret); @@ -1158,7 +1159,8 @@ int replace_file_extents(struct btrfs_trans_handle *trans, num_bytes, parent); ref.real_root = root->root_key.objectid; btrfs_init_data_ref(&ref, btrfs_header_owner(leaf), - key.objectid, key.offset); + key.objectid, key.offset, + root->root_key.objectid, false); ret = btrfs_free_extent(trans, &ref); if (ret) { btrfs_abort_transaction(trans, ret); @@ -1368,7 +1370,8 @@ int replace_path(struct btrfs_trans_handle *trans, struct reloc_control *rc, btrfs_init_generic_ref(&ref, BTRFS_ADD_DELAYED_REF, old_bytenr, blocksize, path->nodes[level]->start); ref.skip_qgroup = true; - btrfs_init_tree_ref(&ref, level - 1, src->root_key.objectid); + btrfs_init_tree_ref(&ref, level - 1, src->root_key.objectid, + 0, true); ret = btrfs_inc_extent_ref(trans, &ref); if (ret) { btrfs_abort_transaction(trans, ret); @@ -1377,7 +1380,8 @@ int replace_path(struct btrfs_trans_handle *trans, struct reloc_control *rc, btrfs_init_generic_ref(&ref, BTRFS_ADD_DELAYED_REF, new_bytenr, blocksize, 0); ref.skip_qgroup = true; - btrfs_init_tree_ref(&ref, level - 1, dest->root_key.objectid); + btrfs_init_tree_ref(&ref, level - 1, dest->root_key.objectid, 0, + true); ret = btrfs_inc_extent_ref(trans, &ref); if (ret) { btrfs_abort_transaction(trans, ret); @@ -1386,7 +1390,8 @@ int replace_path(struct btrfs_trans_handle *trans, struct reloc_control *rc, btrfs_init_generic_ref(&ref, BTRFS_DROP_DELAYED_REF, new_bytenr, blocksize, path->nodes[level]->start); - btrfs_init_tree_ref(&ref, level - 1, src->root_key.objectid); + btrfs_init_tree_ref(&ref, level - 1, src->root_key.objectid, + 0, true); ref.skip_qgroup = true; ret = btrfs_free_extent(trans, &ref); if (ret) { @@ -1396,7 +1401,8 @@ int replace_path(struct btrfs_trans_handle *trans, struct reloc_control *rc, btrfs_init_generic_ref(&ref, BTRFS_DROP_DELAYED_REF, old_bytenr, blocksize, 0); - btrfs_init_tree_ref(&ref, level - 1, dest->root_key.objectid); + btrfs_init_tree_ref(&ref, level - 1, dest->root_key.objectid, + 0, true); ref.skip_qgroup = true; ret = btrfs_free_extent(trans, &ref); if (ret) { @@ -2475,7 +2481,8 @@ static int do_relocation(struct btrfs_trans_handle *trans, upper->eb->start); ref.real_root = root->root_key.objectid; btrfs_init_tree_ref(&ref, node->level, - btrfs_header_owner(upper->eb)); + btrfs_header_owner(upper->eb), + root->root_key.objectid, false); ret = btrfs_inc_extent_ref(trans, &ref); if (!ret) ret = btrfs_drop_subtree(trans, root, eb, diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 1221d8483d63b7..bed6811476b09f 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -761,7 +761,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans, ins.objectid, ins.offset, 0); btrfs_init_data_ref(&ref, root->root_key.objectid, - key->objectid, offset); + key->objectid, offset, 0, false); ret = btrfs_inc_extent_ref(trans, &ref); if (ret) goto out; From 9bc53f5a3932c3f67799a0154bbf25261b0ae8a7 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Mon, 13 Dec 2021 08:45:12 +0000 Subject: [PATCH 0286/1056] btrfs: fix invalid delayed ref after subvolume creation failure [ Upstream commit 7a1636089acfee7562fe79aff7d1b4c57869896d ] When creating a subvolume, at ioctl.c:create_subvol(), if we fail to insert the new root's root item into the root tree, we are freeing the metadata extent we reserved for the new root to prevent a metadata extent leak, as we don't abort the transaction at that point (since there is nothing at that point that is irreversible). However we allocated the metadata extent for the new root which we are creating for the new subvolume, so its delayed reference refers to the ID of this new root. But when we free the metadata extent we pass the root of the subvolume where the new subvolume is located to btrfs_free_tree_block() - this is incorrect because this will generate a delayed reference that refers to the ID of the parent subvolume's root, and not to ID of the new root. This results in a failure when running delayed references that leads to a transaction abort and a trace like the following: [3868.738042] RIP: 0010:__btrfs_free_extent+0x709/0x950 [btrfs] [3868.739857] Code: 68 0f 85 e6 fb ff (...) [3868.742963] RSP: 0018:ffffb0e9045cf910 EFLAGS: 00010246 [3868.743908] RAX: 00000000fffffffe RBX: 00000000fffffffe RCX: 0000000000000002 [3868.745312] RDX: 00000000fffffffe RSI: 0000000000000002 RDI: ffff90b0cd793b88 [3868.746643] RBP: 000000000e5d8000 R08: 0000000000000000 R09: ffff90b0cd793b88 [3868.747979] R10: 0000000000000002 R11: 00014ded97944d68 R12: 0000000000000000 [3868.749373] R13: ffff90b09afe4a28 R14: 0000000000000000 R15: ffff90b0cd793b88 [3868.750725] FS: 00007f281c4a8b80(0000) GS:ffff90b3ada00000(0000) knlGS:0000000000000000 [3868.752275] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [3868.753515] CR2: 00007f281c6a5000 CR3: 0000000108a42006 CR4: 0000000000370ee0 [3868.754869] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [3868.756228] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [3868.757803] Call Trace: [3868.758281] [3868.758655] ? btrfs_merge_delayed_refs+0x178/0x1c0 [btrfs] [3868.759827] __btrfs_run_delayed_refs+0x2b1/0x1250 [btrfs] [3868.761047] btrfs_run_delayed_refs+0x86/0x210 [btrfs] [3868.762069] ? lock_acquired+0x19f/0x420 [3868.762829] btrfs_commit_transaction+0x69/0xb20 [btrfs] [3868.763860] ? _raw_spin_unlock+0x29/0x40 [3868.764614] ? btrfs_block_rsv_release+0x1c2/0x1e0 [btrfs] [3868.765870] create_subvol+0x1d8/0x9a0 [btrfs] [3868.766766] btrfs_mksubvol+0x447/0x4c0 [btrfs] [3868.767669] ? preempt_count_add+0x49/0xa0 [3868.768444] __btrfs_ioctl_snap_create+0x123/0x190 [btrfs] [3868.769639] ? _copy_from_user+0x66/0xa0 [3868.770391] btrfs_ioctl_snap_create_v2+0xbb/0x140 [btrfs] [3868.771495] btrfs_ioctl+0xd1e/0x35c0 [btrfs] [3868.772364] ? __slab_free+0x10a/0x360 [3868.773198] ? rcu_read_lock_sched_held+0x12/0x60 [3868.774121] ? lock_release+0x223/0x4a0 [3868.774863] ? lock_acquired+0x19f/0x420 [3868.775634] ? rcu_read_lock_sched_held+0x12/0x60 [3868.776530] ? trace_hardirqs_on+0x1b/0xe0 [3868.777373] ? _raw_spin_unlock_irqrestore+0x3e/0x60 [3868.778280] ? kmem_cache_free+0x321/0x3c0 [3868.779011] ? __x64_sys_ioctl+0x83/0xb0 [3868.779718] __x64_sys_ioctl+0x83/0xb0 [3868.780387] do_syscall_64+0x3b/0xc0 [3868.781059] entry_SYSCALL_64_after_hwframe+0x44/0xae [3868.781953] RIP: 0033:0x7f281c59e957 [3868.782585] Code: 3c 1c 48 f7 d8 4c (...) [3868.785867] RSP: 002b:00007ffe1f83e2b8 EFLAGS: 00000202 ORIG_RAX: 0000000000000010 [3868.787198] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f281c59e957 [3868.788450] RDX: 00007ffe1f83e2c0 RSI: 0000000050009418 RDI: 0000000000000003 [3868.789748] RBP: 00007ffe1f83f300 R08: 0000000000000000 R09: 00007ffe1f83fe36 [3868.791214] R10: 0000000000000000 R11: 0000000000000202 R12: 0000000000000003 [3868.792468] R13: 0000000000000003 R14: 00007ffe1f83e2c0 R15: 00000000000003cc [3868.793765] [3868.794037] irq event stamp: 0 [3868.794548] hardirqs last enabled at (0): [<0000000000000000>] 0x0 [3868.795670] hardirqs last disabled at (0): [] copy_process+0x934/0x2040 [3868.797086] softirqs last enabled at (0): [] copy_process+0x934/0x2040 [3868.798309] softirqs last disabled at (0): [<0000000000000000>] 0x0 [3868.799284] ---[ end trace be24c7002fe27747 ]--- [3868.799928] BTRFS info (device dm-0): leaf 241188864 gen 1268 total ptrs 214 free space 469 owner 2 [3868.801133] BTRFS info (device dm-0): refs 2 lock_owner 225627 current 225627 [3868.802056] item 0 key (237436928 169 0) itemoff 16250 itemsize 33 [3868.802863] extent refs 1 gen 1265 flags 2 [3868.803447] ref#0: tree block backref root 1610 (...) [3869.064354] item 114 key (241008640 169 0) itemoff 12488 itemsize 33 [3869.065421] extent refs 1 gen 1268 flags 2 [3869.066115] ref#0: tree block backref root 1689 (...) [3869.403834] BTRFS error (device dm-0): unable to find ref byte nr 241008640 parent 0 root 1622 owner 0 offset 0 [3869.405641] BTRFS: error (device dm-0) in __btrfs_free_extent:3076: errno=-2 No such entry [3869.407138] BTRFS: error (device dm-0) in btrfs_run_delayed_refs:2159: errno=-2 No such entry Fix this by passing the new subvolume's root ID to btrfs_free_tree_block(). This requires changing the root argument of btrfs_free_tree_block() from struct btrfs_root * to a u64, since at this point during the subvolume creation we have not yet created the struct btrfs_root for the new subvolume, and btrfs_free_tree_block() only needs a root ID and nothing else from a struct btrfs_root. This was triggered by test case generic/475 from fstests. Fixes: 67addf29004c5b ("btrfs: fix metadata extent leak after failure to create subvolume") CC: stable@vger.kernel.org # 4.4+ Reviewed-by: Nikolay Borisov Signed-off-by: Filipe Manana Signed-off-by: David Sterba Signed-off-by: Sasha Levin --- fs/btrfs/ctree.c | 17 +++++++++-------- fs/btrfs/ctree.h | 7 ++++++- fs/btrfs/extent-tree.c | 13 +++++++------ fs/btrfs/free-space-tree.c | 4 ++-- fs/btrfs/ioctl.c | 9 +++++---- fs/btrfs/qgroup.c | 3 ++- 6 files changed, 31 insertions(+), 22 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 899f8544592525..341ce90d24b15c 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -462,8 +462,8 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, BUG_ON(ret < 0); rcu_assign_pointer(root->node, cow); - btrfs_free_tree_block(trans, root, buf, parent_start, - last_ref); + btrfs_free_tree_block(trans, btrfs_root_id(root), buf, + parent_start, last_ref); free_extent_buffer(buf); add_root_to_dirty_list(root); } else { @@ -484,8 +484,8 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, return ret; } } - btrfs_free_tree_block(trans, root, buf, parent_start, - last_ref); + btrfs_free_tree_block(trans, btrfs_root_id(root), buf, + parent_start, last_ref); } if (unlock_orig) btrfs_tree_unlock(buf); @@ -926,7 +926,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, free_extent_buffer(mid); root_sub_used(root, mid->len); - btrfs_free_tree_block(trans, root, mid, 0, 1); + btrfs_free_tree_block(trans, btrfs_root_id(root), mid, 0, 1); /* once for the root ptr */ free_extent_buffer_stale(mid); return 0; @@ -985,7 +985,8 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, btrfs_tree_unlock(right); del_ptr(root, path, level + 1, pslot + 1); root_sub_used(root, right->len); - btrfs_free_tree_block(trans, root, right, 0, 1); + btrfs_free_tree_block(trans, btrfs_root_id(root), right, + 0, 1); free_extent_buffer_stale(right); right = NULL; } else { @@ -1030,7 +1031,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, btrfs_tree_unlock(mid); del_ptr(root, path, level + 1, pslot); root_sub_used(root, mid->len); - btrfs_free_tree_block(trans, root, mid, 0, 1); + btrfs_free_tree_block(trans, btrfs_root_id(root), mid, 0, 1); free_extent_buffer_stale(mid); mid = NULL; } else { @@ -4059,7 +4060,7 @@ static noinline void btrfs_del_leaf(struct btrfs_trans_handle *trans, root_sub_used(root, leaf->len); atomic_inc(&leaf->refs); - btrfs_free_tree_block(trans, root, leaf, 0, 1); + btrfs_free_tree_block(trans, btrfs_root_id(root), leaf, 0, 1); free_extent_buffer_stale(leaf); } /* diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 21c44846b00221..cc72d8981c4785 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -2256,6 +2256,11 @@ static inline bool btrfs_root_dead(const struct btrfs_root *root) return (root->root_item.flags & cpu_to_le64(BTRFS_ROOT_SUBVOL_DEAD)) != 0; } +static inline u64 btrfs_root_id(const struct btrfs_root *root) +{ + return root->root_key.objectid; +} + /* struct btrfs_root_backup */ BTRFS_SETGET_STACK_FUNCS(backup_tree_root, struct btrfs_root_backup, tree_root, 64); @@ -2718,7 +2723,7 @@ struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans, u64 empty_size, enum btrfs_lock_nesting nest); void btrfs_free_tree_block(struct btrfs_trans_handle *trans, - struct btrfs_root *root, + u64 root_id, struct extent_buffer *buf, u64 parent, int last_ref); int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans, diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index e01b9344fb9c59..f11616f61dd62c 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -3280,20 +3280,20 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans, } void btrfs_free_tree_block(struct btrfs_trans_handle *trans, - struct btrfs_root *root, + u64 root_id, struct extent_buffer *buf, u64 parent, int last_ref) { - struct btrfs_fs_info *fs_info = root->fs_info; + struct btrfs_fs_info *fs_info = trans->fs_info; struct btrfs_ref generic_ref = { 0 }; int ret; btrfs_init_generic_ref(&generic_ref, BTRFS_DROP_DELAYED_REF, buf->start, buf->len, parent); btrfs_init_tree_ref(&generic_ref, btrfs_header_level(buf), - root->root_key.objectid, 0, false); + root_id, 0, false); - if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) { + if (root_id != BTRFS_TREE_LOG_OBJECTID) { btrfs_ref_tree_mod(fs_info, &generic_ref); ret = btrfs_add_delayed_tree_ref(trans, &generic_ref, NULL); BUG_ON(ret); /* -ENOMEM */ @@ -3303,7 +3303,7 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans, struct btrfs_block_group *cache; bool must_pin = false; - if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) { + if (root_id != BTRFS_TREE_LOG_OBJECTID) { ret = check_ref_cleanup(trans, buf->start); if (!ret) { btrfs_redirty_list_add(trans->transaction, buf); @@ -5441,7 +5441,8 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans, goto owner_mismatch; } - btrfs_free_tree_block(trans, root, eb, parent, wc->refs[level] == 1); + btrfs_free_tree_block(trans, btrfs_root_id(root), eb, parent, + wc->refs[level] == 1); out: wc->refs[level] = 0; wc->flags[level] = 0; diff --git a/fs/btrfs/free-space-tree.c b/fs/btrfs/free-space-tree.c index a33bca94d133ec..3abec44c62559d 100644 --- a/fs/btrfs/free-space-tree.c +++ b/fs/btrfs/free-space-tree.c @@ -1256,8 +1256,8 @@ int btrfs_clear_free_space_tree(struct btrfs_fs_info *fs_info) btrfs_tree_lock(free_space_root->node); btrfs_clean_tree_block(free_space_root->node); btrfs_tree_unlock(free_space_root->node); - btrfs_free_tree_block(trans, free_space_root, free_space_root->node, - 0, 1); + btrfs_free_tree_block(trans, btrfs_root_id(free_space_root), + free_space_root->node, 0, 1); btrfs_put_root(free_space_root); diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index bf53af8694f8e8..7272d9d3fa78d7 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -615,11 +615,12 @@ static noinline int create_subvol(struct user_namespace *mnt_userns, * Since we don't abort the transaction in this case, free the * tree block so that we don't leak space and leave the * filesystem in an inconsistent state (an extent item in the - * extent tree without backreferences). Also no need to have - * the tree block locked since it is not in any tree at this - * point, so no other task can find it and use it. + * extent tree with a backreference for a root that does not + * exists). Also no need to have the tree block locked since it + * is not in any tree at this point, so no other task can find + * it and use it. */ - btrfs_free_tree_block(trans, root, leaf, 0, 1); + btrfs_free_tree_block(trans, objectid, leaf, 0, 1); free_extent_buffer(leaf); goto fail; } diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 2c803108ea9446..4ca809fa80eaf8 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -1259,7 +1259,8 @@ int btrfs_quota_disable(struct btrfs_fs_info *fs_info) btrfs_tree_lock(quota_root->node); btrfs_clean_tree_block(quota_root->node); btrfs_tree_unlock(quota_root->node); - btrfs_free_tree_block(trans, quota_root, quota_root->node, 0, 1); + btrfs_free_tree_block(trans, btrfs_root_id(quota_root), + quota_root->node, 0, 1); btrfs_put_root(quota_root); From 48f8f198a2ab8e89050aac4ebe6d1a167d73d956 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Mon, 13 Dec 2021 08:45:13 +0000 Subject: [PATCH 0287/1056] btrfs: fix warning when freeing leaf after subvolume creation failure [ Upstream commit 212a58fda9b9077e0efc20200a4feb76afacfd95 ] When creating a subvolume, at ioctl.c:create_subvol(), if we fail to insert the root item for the new subvolume into the root tree, we can trigger the following warning: [78961.741046] WARNING: CPU: 0 PID: 4079814 at fs/btrfs/extent-tree.c:3357 btrfs_free_tree_block+0x2af/0x310 [btrfs] [78961.743344] Modules linked in: [78961.749440] dm_snapshot dm_thin_pool (...) [78961.773648] CPU: 0 PID: 4079814 Comm: fsstress Not tainted 5.16.0-rc4-btrfs-next-108 #1 [78961.775198] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.14.0-0-g155821a1990b-prebuilt.qemu.org 04/01/2014 [78961.777266] RIP: 0010:btrfs_free_tree_block+0x2af/0x310 [btrfs] [78961.778398] Code: 17 00 48 85 (...) [78961.781067] RSP: 0018:ffffaa4001657b28 EFLAGS: 00010202 [78961.781877] RAX: 0000000000000213 RBX: ffff897f8a796910 RCX: 0000000000000000 [78961.782780] RDX: 0000000000000000 RSI: 0000000011004000 RDI: 00000000ffffffff [78961.783764] RBP: ffff8981f490e800 R08: 0000000000000001 R09: 0000000000000000 [78961.784740] R10: 0000000000000000 R11: 0000000000000001 R12: ffff897fc963fcc8 [78961.785665] R13: 0000000000000001 R14: ffff898063548000 R15: ffff898063548000 [78961.786620] FS: 00007f31283c6b80(0000) GS:ffff8982ace00000(0000) knlGS:0000000000000000 [78961.787717] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [78961.788598] CR2: 00007f31285c3000 CR3: 000000023fcc8003 CR4: 0000000000370ef0 [78961.789568] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [78961.790585] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [78961.791684] Call Trace: [78961.792082] [78961.792359] create_subvol+0x5d1/0x9a0 [btrfs] [78961.793054] btrfs_mksubvol+0x447/0x4c0 [btrfs] [78961.794009] ? preempt_count_add+0x49/0xa0 [78961.794705] __btrfs_ioctl_snap_create+0x123/0x190 [btrfs] [78961.795712] ? _copy_from_user+0x66/0xa0 [78961.796382] btrfs_ioctl_snap_create_v2+0xbb/0x140 [btrfs] [78961.797392] btrfs_ioctl+0xd1e/0x35c0 [btrfs] [78961.798172] ? __slab_free+0x10a/0x360 [78961.798820] ? rcu_read_lock_sched_held+0x12/0x60 [78961.799664] ? lock_release+0x223/0x4a0 [78961.800321] ? lock_acquired+0x19f/0x420 [78961.800992] ? rcu_read_lock_sched_held+0x12/0x60 [78961.801796] ? trace_hardirqs_on+0x1b/0xe0 [78961.802495] ? _raw_spin_unlock_irqrestore+0x3e/0x60 [78961.803358] ? kmem_cache_free+0x321/0x3c0 [78961.804071] ? __x64_sys_ioctl+0x83/0xb0 [78961.804711] __x64_sys_ioctl+0x83/0xb0 [78961.805348] do_syscall_64+0x3b/0xc0 [78961.805969] entry_SYSCALL_64_after_hwframe+0x44/0xae [78961.806830] RIP: 0033:0x7f31284bc957 [78961.807517] Code: 3c 1c 48 f7 d8 (...) This is because we are calling btrfs_free_tree_block() on an extent buffer that is dirty. Fix that by cleaning the extent buffer, with btrfs_clean_tree_block(), before freeing it. This was triggered by test case generic/475 from fstests. Fixes: 67addf29004c5b ("btrfs: fix metadata extent leak after failure to create subvolume") CC: stable@vger.kernel.org # 4.4+ Reviewed-by: Nikolay Borisov Signed-off-by: Filipe Manana Signed-off-by: David Sterba Signed-off-by: Sasha Levin --- fs/btrfs/ioctl.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 7272d9d3fa78d7..a37ab3e89a3bcc 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -616,10 +616,11 @@ static noinline int create_subvol(struct user_namespace *mnt_userns, * tree block so that we don't leak space and leave the * filesystem in an inconsistent state (an extent item in the * extent tree with a backreference for a root that does not - * exists). Also no need to have the tree block locked since it - * is not in any tree at this point, so no other task can find - * it and use it. + * exists). */ + btrfs_tree_lock(leaf); + btrfs_clean_tree_block(leaf); + btrfs_tree_unlock(leaf); btrfs_free_tree_block(trans, objectid, leaf, 0, 1); free_extent_buffer(leaf); goto fail; From 8d1d6b29baa98135994a1d5d1b5d471677e04773 Mon Sep 17 00:00:00 2001 From: Tang Bin Date: Sun, 5 Sep 2021 18:57:32 -0700 Subject: [PATCH 0288/1056] Input: cpcap-pwrbutton - handle errors from platform_get_irq() [ Upstream commit 58ae4004b9c4bb040958cf73986b687a5ea4d85d ] The function cpcap_power_button_probe() does not perform sufficient error checking after executing platform_get_irq(), thus fix it. Signed-off-by: Tang Bin Link: https://lore.kernel.org/r/20210802121740.8700-1-tangbin@cmss.chinamobile.com Signed-off-by: Dmitry Torokhov Signed-off-by: Sasha Levin --- drivers/input/misc/cpcap-pwrbutton.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/input/misc/cpcap-pwrbutton.c b/drivers/input/misc/cpcap-pwrbutton.c index 0abef63217e21e..372cb44d06357a 100644 --- a/drivers/input/misc/cpcap-pwrbutton.c +++ b/drivers/input/misc/cpcap-pwrbutton.c @@ -54,9 +54,13 @@ static irqreturn_t powerbutton_irq(int irq, void *_button) static int cpcap_power_button_probe(struct platform_device *pdev) { struct cpcap_power_button *button; - int irq = platform_get_irq(pdev, 0); + int irq; int err; + irq = platform_get_irq(pdev, 0); + if (irq < 0) + return irq; + button = devm_kmalloc(&pdev->dev, sizeof(*button), GFP_KERNEL); if (!button) return -ENOMEM; From 1354ceb1b6bfcb0c51b099a2d7da65e8254a254a Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 20 Sep 2021 21:08:02 -0700 Subject: [PATCH 0289/1056] Input: goodix - change goodix_i2c_write() len parameter type to int [ Upstream commit 31ae0102a34ed863c7d32b10e768036324991679 ] Change the type of the goodix_i2c_write() len parameter to from 'unsigned' to 'int' to avoid bare use of 'unsigned', changing it to 'int' makes goodix_i2c_write()' prototype consistent with goodix_i2c_read(). Reviewed-by: Bastien Nocera Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20210920150643.155872-2-hdegoede@redhat.com Signed-off-by: Dmitry Torokhov Signed-off-by: Sasha Levin --- drivers/input/touchscreen/goodix.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/input/touchscreen/goodix.c b/drivers/input/touchscreen/goodix.c index 5051a1766aac62..1eb776abe5625e 100644 --- a/drivers/input/touchscreen/goodix.c +++ b/drivers/input/touchscreen/goodix.c @@ -246,7 +246,7 @@ static int goodix_i2c_read(struct i2c_client *client, * @len: length of the buffer to write */ static int goodix_i2c_write(struct i2c_client *client, u16 reg, const u8 *buf, - unsigned len) + int len) { u8 *addr_buf; struct i2c_msg msg; From f5b1c6d526d8525329e60a9c216f5ff8d011453e Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 20 Sep 2021 21:08:18 -0700 Subject: [PATCH 0290/1056] Input: goodix - add a goodix.h header file [ Upstream commit a2233cb7b65a017067e2f2703375ecc930a0ab30 ] Add a goodix.h header file, and move the register definitions, and struct declarations there and add prototypes for various helper functions. This is a preparation patch for adding support for controllers without flash, which need to have their firmware uploaded and need some other special handling too. Since MAINTAINERS needs updating because of this change anyways, also add myself as co-maintainer. Reviewed-by: Bastien Nocera Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20210920150643.155872-3-hdegoede@redhat.com Signed-off-by: Dmitry Torokhov Signed-off-by: Sasha Levin --- MAINTAINERS | 3 +- drivers/input/touchscreen/goodix.c | 74 +++--------------------------- drivers/input/touchscreen/goodix.h | 73 +++++++++++++++++++++++++++++ 3 files changed, 81 insertions(+), 69 deletions(-) create mode 100644 drivers/input/touchscreen/goodix.h diff --git a/MAINTAINERS b/MAINTAINERS index a60d7e0466afa3..edc32575828b59 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7952,9 +7952,10 @@ F: drivers/media/usb/go7007/ GOODIX TOUCHSCREEN M: Bastien Nocera +M: Hans de Goede L: linux-input@vger.kernel.org S: Maintained -F: drivers/input/touchscreen/goodix.c +F: drivers/input/touchscreen/goodix* GOOGLE ETHERNET DRIVERS M: Jeroen de Borst diff --git a/drivers/input/touchscreen/goodix.c b/drivers/input/touchscreen/goodix.c index 1eb776abe5625e..5ccdd6abd86835 100644 --- a/drivers/input/touchscreen/goodix.c +++ b/drivers/input/touchscreen/goodix.c @@ -14,20 +14,15 @@ #include #include #include -#include -#include -#include -#include -#include #include #include #include #include -#include #include #include #include #include +#include "goodix.h" #define GOODIX_GPIO_INT_NAME "irq" #define GOODIX_GPIO_RST_NAME "reset" @@ -38,22 +33,11 @@ #define GOODIX_CONTACT_SIZE 8 #define GOODIX_MAX_CONTACT_SIZE 9 #define GOODIX_MAX_CONTACTS 10 -#define GOODIX_MAX_KEYS 7 #define GOODIX_CONFIG_MIN_LENGTH 186 #define GOODIX_CONFIG_911_LENGTH 186 #define GOODIX_CONFIG_967_LENGTH 228 #define GOODIX_CONFIG_GT9X_LENGTH 240 -#define GOODIX_CONFIG_MAX_LENGTH 240 - -/* Register defines */ -#define GOODIX_REG_COMMAND 0x8040 -#define GOODIX_CMD_SCREEN_OFF 0x05 - -#define GOODIX_READ_COOR_ADDR 0x814E -#define GOODIX_GT1X_REG_CONFIG_DATA 0x8050 -#define GOODIX_GT9X_REG_CONFIG_DATA 0x8047 -#define GOODIX_REG_ID 0x8140 #define GOODIX_BUFFER_STATUS_READY BIT(7) #define GOODIX_HAVE_KEY BIT(4) @@ -68,55 +52,11 @@ #define ACPI_GPIO_SUPPORT #endif -struct goodix_ts_data; - -enum goodix_irq_pin_access_method { - IRQ_PIN_ACCESS_NONE, - IRQ_PIN_ACCESS_GPIO, - IRQ_PIN_ACCESS_ACPI_GPIO, - IRQ_PIN_ACCESS_ACPI_METHOD, -}; - -struct goodix_chip_data { - u16 config_addr; - int config_len; - int (*check_config)(struct goodix_ts_data *ts, const u8 *cfg, int len); - void (*calc_config_checksum)(struct goodix_ts_data *ts); -}; - struct goodix_chip_id { const char *id; const struct goodix_chip_data *data; }; -#define GOODIX_ID_MAX_LEN 4 - -struct goodix_ts_data { - struct i2c_client *client; - struct input_dev *input_dev; - const struct goodix_chip_data *chip; - struct touchscreen_properties prop; - unsigned int max_touch_num; - unsigned int int_trigger_type; - struct regulator *avdd28; - struct regulator *vddio; - struct gpio_desc *gpiod_int; - struct gpio_desc *gpiod_rst; - int gpio_count; - int gpio_int_idx; - char id[GOODIX_ID_MAX_LEN + 1]; - u16 version; - const char *cfg_name; - bool reset_controller_at_probe; - bool load_cfg_from_disk; - struct completion firmware_loading_complete; - unsigned long irq_flags; - enum goodix_irq_pin_access_method irq_pin_access_method; - unsigned int contact_size; - u8 config[GOODIX_CONFIG_MAX_LENGTH]; - unsigned short keymap[GOODIX_MAX_KEYS]; -}; - static int goodix_check_cfg_8(struct goodix_ts_data *ts, const u8 *cfg, int len); static int goodix_check_cfg_16(struct goodix_ts_data *ts, @@ -216,8 +156,7 @@ static const struct dmi_system_id inverted_x_screen[] = { * @buf: raw write data buffer. * @len: length of the buffer to write */ -static int goodix_i2c_read(struct i2c_client *client, - u16 reg, u8 *buf, int len) +int goodix_i2c_read(struct i2c_client *client, u16 reg, u8 *buf, int len) { struct i2c_msg msgs[2]; __be16 wbuf = cpu_to_be16(reg); @@ -245,8 +184,7 @@ static int goodix_i2c_read(struct i2c_client *client, * @buf: raw data buffer to write. * @len: length of the buffer to write */ -static int goodix_i2c_write(struct i2c_client *client, u16 reg, const u8 *buf, - int len) +int goodix_i2c_write(struct i2c_client *client, u16 reg, const u8 *buf, int len) { u8 *addr_buf; struct i2c_msg msg; @@ -270,7 +208,7 @@ static int goodix_i2c_write(struct i2c_client *client, u16 reg, const u8 *buf, return ret < 0 ? ret : (ret != 1 ? -EIO : 0); } -static int goodix_i2c_write_u8(struct i2c_client *client, u16 reg, u8 value) +int goodix_i2c_write_u8(struct i2c_client *client, u16 reg, u8 value) { return goodix_i2c_write(client, reg, &value, sizeof(value)); } @@ -554,7 +492,7 @@ static int goodix_check_cfg(struct goodix_ts_data *ts, const u8 *cfg, int len) * @cfg: config firmware to write to device * @len: config data length */ -static int goodix_send_cfg(struct goodix_ts_data *ts, const u8 *cfg, int len) +int goodix_send_cfg(struct goodix_ts_data *ts, const u8 *cfg, int len) { int error; @@ -652,7 +590,7 @@ static int goodix_irq_direction_input(struct goodix_ts_data *ts) return -EINVAL; /* Never reached */ } -static int goodix_int_sync(struct goodix_ts_data *ts) +int goodix_int_sync(struct goodix_ts_data *ts) { int error; diff --git a/drivers/input/touchscreen/goodix.h b/drivers/input/touchscreen/goodix.h new file mode 100644 index 00000000000000..cdaced4f2980ad --- /dev/null +++ b/drivers/input/touchscreen/goodix.h @@ -0,0 +1,73 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef __GOODIX_H__ +#define __GOODIX_H__ + +#include +#include +#include +#include +#include +#include + +/* Register defines */ +#define GOODIX_REG_COMMAND 0x8040 +#define GOODIX_CMD_SCREEN_OFF 0x05 + +#define GOODIX_GT1X_REG_CONFIG_DATA 0x8050 +#define GOODIX_GT9X_REG_CONFIG_DATA 0x8047 +#define GOODIX_REG_ID 0x8140 +#define GOODIX_READ_COOR_ADDR 0x814E + +#define GOODIX_ID_MAX_LEN 4 +#define GOODIX_CONFIG_MAX_LENGTH 240 +#define GOODIX_MAX_KEYS 7 + +enum goodix_irq_pin_access_method { + IRQ_PIN_ACCESS_NONE, + IRQ_PIN_ACCESS_GPIO, + IRQ_PIN_ACCESS_ACPI_GPIO, + IRQ_PIN_ACCESS_ACPI_METHOD, +}; + +struct goodix_ts_data; + +struct goodix_chip_data { + u16 config_addr; + int config_len; + int (*check_config)(struct goodix_ts_data *ts, const u8 *cfg, int len); + void (*calc_config_checksum)(struct goodix_ts_data *ts); +}; + +struct goodix_ts_data { + struct i2c_client *client; + struct input_dev *input_dev; + const struct goodix_chip_data *chip; + struct touchscreen_properties prop; + unsigned int max_touch_num; + unsigned int int_trigger_type; + struct regulator *avdd28; + struct regulator *vddio; + struct gpio_desc *gpiod_int; + struct gpio_desc *gpiod_rst; + int gpio_count; + int gpio_int_idx; + char id[GOODIX_ID_MAX_LEN + 1]; + u16 version; + const char *cfg_name; + bool reset_controller_at_probe; + bool load_cfg_from_disk; + struct completion firmware_loading_complete; + unsigned long irq_flags; + enum goodix_irq_pin_access_method irq_pin_access_method; + unsigned int contact_size; + u8 config[GOODIX_CONFIG_MAX_LENGTH]; + unsigned short keymap[GOODIX_MAX_KEYS]; +}; + +int goodix_i2c_read(struct i2c_client *client, u16 reg, u8 *buf, int len); +int goodix_i2c_write(struct i2c_client *client, u16 reg, const u8 *buf, int len); +int goodix_i2c_write_u8(struct i2c_client *client, u16 reg, u8 value); +int goodix_send_cfg(struct goodix_ts_data *ts, const u8 *cfg, int len); +int goodix_int_sync(struct goodix_ts_data *ts); + +#endif From 8422a9b306f10ecc308ee0f3f209dbd6be9f29bd Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 20 Sep 2021 21:08:26 -0700 Subject: [PATCH 0291/1056] Input: goodix - refactor reset handling [ Upstream commit 209bda4741f68f102cf2f272227bfc938e387b51 ] Refactor reset handling a bit, change the main reset handler into a new goodix_reset_no_int_sync() helper and add a goodix_reset() wrapper which calls goodix_int_sync() separately. Also push the dev_err() call on reset failure into the goodix_reset_no_int_sync() and goodix_int_sync() functions, so that we don't need to have separate dev_err() calls in all their callers. This is a preparation patch for adding support for controllers without flash, which need to have their firmware uploaded and need some other special handling too. Reviewed-by: Bastien Nocera Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20210920150643.155872-4-hdegoede@redhat.com Signed-off-by: Dmitry Torokhov Signed-off-by: Sasha Levin --- drivers/input/touchscreen/goodix.c | 48 ++++++++++++++++++++---------- drivers/input/touchscreen/goodix.h | 1 + 2 files changed, 33 insertions(+), 16 deletions(-) diff --git a/drivers/input/touchscreen/goodix.c b/drivers/input/touchscreen/goodix.c index 5ccdd6abd86835..2ca903a8af2142 100644 --- a/drivers/input/touchscreen/goodix.c +++ b/drivers/input/touchscreen/goodix.c @@ -596,56 +596,76 @@ int goodix_int_sync(struct goodix_ts_data *ts) error = goodix_irq_direction_output(ts, 0); if (error) - return error; + goto error; msleep(50); /* T5: 50ms */ error = goodix_irq_direction_input(ts); if (error) - return error; + goto error; return 0; + +error: + dev_err(&ts->client->dev, "Controller irq sync failed.\n"); + return error; } /** - * goodix_reset - Reset device during power on + * goodix_reset_no_int_sync - Reset device, leaving interrupt line in output mode * * @ts: goodix_ts_data pointer */ -static int goodix_reset(struct goodix_ts_data *ts) +int goodix_reset_no_int_sync(struct goodix_ts_data *ts) { int error; /* begin select I2C slave addr */ error = gpiod_direction_output(ts->gpiod_rst, 0); if (error) - return error; + goto error; msleep(20); /* T2: > 10ms */ /* HIGH: 0x28/0x29, LOW: 0xBA/0xBB */ error = goodix_irq_direction_output(ts, ts->client->addr == 0x14); if (error) - return error; + goto error; usleep_range(100, 2000); /* T3: > 100us */ error = gpiod_direction_output(ts->gpiod_rst, 1); if (error) - return error; + goto error; usleep_range(6000, 10000); /* T4: > 5ms */ /* end select I2C slave addr */ error = gpiod_direction_input(ts->gpiod_rst); if (error) - return error; + goto error; - error = goodix_int_sync(ts); + return 0; + +error: + dev_err(&ts->client->dev, "Controller reset failed.\n"); + return error; +} + +/** + * goodix_reset - Reset device during power on + * + * @ts: goodix_ts_data pointer + */ +static int goodix_reset(struct goodix_ts_data *ts) +{ + int error; + + error = goodix_reset_no_int_sync(ts); if (error) return error; - return 0; + return goodix_int_sync(ts); } #ifdef ACPI_GPIO_SUPPORT @@ -1144,10 +1164,8 @@ static int goodix_ts_probe(struct i2c_client *client, if (ts->reset_controller_at_probe) { /* reset the controller */ error = goodix_reset(ts); - if (error) { - dev_err(&client->dev, "Controller reset failed.\n"); + if (error) return error; - } } error = goodix_i2c_test(client); @@ -1289,10 +1307,8 @@ static int __maybe_unused goodix_resume(struct device *dev) if (error != 0 || config_ver != ts->config[0]) { error = goodix_reset(ts); - if (error) { - dev_err(dev, "Controller reset failed.\n"); + if (error) return error; - } error = goodix_send_cfg(ts, ts->config, ts->chip->config_len); if (error) diff --git a/drivers/input/touchscreen/goodix.h b/drivers/input/touchscreen/goodix.h index cdaced4f2980ad..0b88554ba2aee1 100644 --- a/drivers/input/touchscreen/goodix.h +++ b/drivers/input/touchscreen/goodix.h @@ -69,5 +69,6 @@ int goodix_i2c_write(struct i2c_client *client, u16 reg, const u8 *buf, int len) int goodix_i2c_write_u8(struct i2c_client *client, u16 reg, u8 value); int goodix_send_cfg(struct goodix_ts_data *ts, const u8 *cfg, int len); int goodix_int_sync(struct goodix_ts_data *ts); +int goodix_reset_no_int_sync(struct goodix_ts_data *ts); #endif From 140395211626142f64b1e4269e01c26641307d86 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 6 Dec 2021 23:15:09 -0800 Subject: [PATCH 0292/1056] Input: goodix - try not to touch the reset-pin on x86/ACPI devices [ Upstream commit a2fd46cd3dbb83b373ba74f4043f8dae869c65f1 ] Unless the controller is not responding at boot or after suspend/resume, the driver never resets the controller on x86/ACPI platforms. The driver still requesting the reset pin at probe() though in case it needs it. Until now the driver has always requested the reset pin with GPIOD_IN as type. The idea being to put the pin in high-impedance mode to save power until the driver actually wants to issue a reset. But this means that just requesting the pin can cause issues, since requesting it in another mode then GPIOD_ASIS may cause the pinctrl driver to touch the pin settings. We have already had issues before due to a bug in the pinctrl-cherryview.c driver which has been fixed in commit 921daeeca91b ("pinctrl: cherryview: Preserve CHV_PADCTRL1_INVRXTX_TXDATA flag on GPIOs"). And now it turns out that requesting the reset-pin as GPIOD_IN also stops the touchscreen from working on the GPD P2 max mini-laptop. The behavior of putting the pin in high-impedance mode relies on there being some external pull-up to keep it high and there seems to be no pull-up on the GPD P2 max, causing things to break. This commit fixes this by requesting the reset pin as is when using the x86/ACPI code paths to lookup the GPIOs; and by not dropping it back into input-mode in case the driver does end up issuing a reset for error-recovery. BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=209061 Fixes: a7d4b171660c ("Input: goodix - add support for getting IRQ + reset GPIOs on Cherry Trail devices") Cc: stable@vger.kernel.org Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20211206091116.44466-2-hdegoede@redhat.com Signed-off-by: Dmitry Torokhov Signed-off-by: Sasha Levin --- drivers/input/touchscreen/goodix.c | 30 +++++++++++++++++++++++++----- drivers/input/touchscreen/goodix.h | 1 + 2 files changed, 26 insertions(+), 5 deletions(-) diff --git a/drivers/input/touchscreen/goodix.c b/drivers/input/touchscreen/goodix.c index 2ca903a8af2142..3667f7e51fde42 100644 --- a/drivers/input/touchscreen/goodix.c +++ b/drivers/input/touchscreen/goodix.c @@ -640,10 +640,16 @@ int goodix_reset_no_int_sync(struct goodix_ts_data *ts) usleep_range(6000, 10000); /* T4: > 5ms */ - /* end select I2C slave addr */ - error = gpiod_direction_input(ts->gpiod_rst); - if (error) - goto error; + /* + * Put the reset pin back in to input / high-impedance mode to save + * power. Only do this in the non ACPI case since some ACPI boards + * don't have a pull-up, so there the reset pin must stay active-high. + */ + if (ts->irq_pin_access_method == IRQ_PIN_ACCESS_GPIO) { + error = gpiod_direction_input(ts->gpiod_rst); + if (error) + goto error; + } return 0; @@ -777,6 +783,14 @@ static int goodix_add_acpi_gpio_mappings(struct goodix_ts_data *ts) return -EINVAL; } + /* + * Normally we put the reset pin in input / high-impedance mode to save + * power. But some x86/ACPI boards don't have a pull-up, so for the ACPI + * case, leave the pin as is. This results in the pin not being touched + * at all on x86/ACPI boards, except when needed for error-recover. + */ + ts->gpiod_rst_flags = GPIOD_ASIS; + return devm_acpi_dev_add_driver_gpios(dev, gpio_mapping); } #else @@ -802,6 +816,12 @@ static int goodix_get_gpio_config(struct goodix_ts_data *ts) return -EINVAL; dev = &ts->client->dev; + /* + * By default we request the reset pin as input, leaving it in + * high-impedance when not resetting the controller to save power. + */ + ts->gpiod_rst_flags = GPIOD_IN; + ts->avdd28 = devm_regulator_get(dev, "AVDD28"); if (IS_ERR(ts->avdd28)) { error = PTR_ERR(ts->avdd28); @@ -839,7 +859,7 @@ static int goodix_get_gpio_config(struct goodix_ts_data *ts) ts->gpiod_int = gpiod; /* Get the reset line GPIO pin number */ - gpiod = devm_gpiod_get_optional(dev, GOODIX_GPIO_RST_NAME, GPIOD_IN); + gpiod = devm_gpiod_get_optional(dev, GOODIX_GPIO_RST_NAME, ts->gpiod_rst_flags); if (IS_ERR(gpiod)) { error = PTR_ERR(gpiod); if (error != -EPROBE_DEFER) diff --git a/drivers/input/touchscreen/goodix.h b/drivers/input/touchscreen/goodix.h index 0b88554ba2aee1..1a1571ad2cd23c 100644 --- a/drivers/input/touchscreen/goodix.h +++ b/drivers/input/touchscreen/goodix.h @@ -51,6 +51,7 @@ struct goodix_ts_data { struct gpio_desc *gpiod_rst; int gpio_count; int gpio_int_idx; + enum gpiod_flags gpiod_rst_flags; char id[GOODIX_ID_MAX_LEN + 1]; u16 version; const char *cfg_name; From 3fb11d13220da1381e38ec90f9ad4101cfa1d578 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michel=20D=C3=A4nzer?= Date: Fri, 23 Jul 2021 09:58:57 +0200 Subject: [PATCH 0293/1056] dma-buf/poll: Get a file reference for outstanding fence callbacks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit ff2d23843f7fb4f13055be5a4a9a20ddd04e6e9c ] This makes sure we don't hit the BUG_ON(dmabuf->cb_in.active || dmabuf->cb_out.active); in dma_buf_release, which could be triggered by user space closing the dma-buf file description while there are outstanding fence callbacks from dma_buf_poll. Cc: stable@vger.kernel.org Signed-off-by: Michel Dänzer Reviewed-by: Christian König Link: https://patchwork.freedesktop.org/patch/msgid/20210723075857.4065-1-michel@daenzer.net Signed-off-by: Christian König Signed-off-by: Sasha Levin --- drivers/dma-buf/dma-buf.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c index f9217e300eea9c..968c3df2810e65 100644 --- a/drivers/dma-buf/dma-buf.c +++ b/drivers/dma-buf/dma-buf.c @@ -67,12 +67,9 @@ static void dma_buf_release(struct dentry *dentry) BUG_ON(dmabuf->vmapping_counter); /* - * Any fences that a dma-buf poll can wait on should be signaled - * before releasing dma-buf. This is the responsibility of each - * driver that uses the reservation objects. - * - * If you hit this BUG() it means someone dropped their ref to the - * dma-buf while still having pending operation to the buffer. + * If you hit this BUG() it could mean: + * * There's a file reference imbalance in dma_buf_poll / dma_buf_poll_cb or somewhere else + * * dmabuf->cb_in/out.active are non-0 despite no pending fence callback */ BUG_ON(dmabuf->cb_in.active || dmabuf->cb_out.active); @@ -200,6 +197,7 @@ static loff_t dma_buf_llseek(struct file *file, loff_t offset, int whence) static void dma_buf_poll_cb(struct dma_fence *fence, struct dma_fence_cb *cb) { struct dma_buf_poll_cb_t *dcb = (struct dma_buf_poll_cb_t *)cb; + struct dma_buf *dmabuf = container_of(dcb->poll, struct dma_buf, poll); unsigned long flags; spin_lock_irqsave(&dcb->poll->lock, flags); @@ -207,6 +205,8 @@ static void dma_buf_poll_cb(struct dma_fence *fence, struct dma_fence_cb *cb) dcb->active = 0; spin_unlock_irqrestore(&dcb->poll->lock, flags); dma_fence_put(fence); + /* Paired with get_file in dma_buf_poll */ + fput(dmabuf->file); } static bool dma_buf_poll_shared(struct dma_resv *resv, @@ -282,8 +282,12 @@ static __poll_t dma_buf_poll(struct file *file, poll_table *poll) spin_unlock_irq(&dmabuf->poll.lock); if (events & EPOLLOUT) { + /* Paired with fput in dma_buf_poll_cb */ + get_file(dmabuf->file); + if (!dma_buf_poll_shared(resv, dcb) && !dma_buf_poll_excl(resv, dcb)) + /* No callback queued, wake up any other waiters */ dma_buf_poll_cb(NULL, &dcb->cb); else @@ -303,6 +307,9 @@ static __poll_t dma_buf_poll(struct file *file, poll_table *poll) spin_unlock_irq(&dmabuf->poll.lock); if (events & EPOLLIN) { + /* Paired with fput in dma_buf_poll_cb */ + get_file(dmabuf->file); + if (!dma_buf_poll_excl(resv, dcb)) /* No callback queued, wake up any other waiters */ dma_buf_poll_cb(NULL, &dcb->cb); From 70fc07e30817c8611b9f841510f04e53cd8659cd Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Wed, 13 Oct 2021 10:12:49 +0100 Subject: [PATCH 0294/1056] btrfs: fix deadlock between chunk allocation and chunk btree modifications [ Upstream commit 2bb2e00ed9787e52580bb651264b8d6a2b7a9dd2 ] When a task is doing some modification to the chunk btree and it is not in the context of a chunk allocation or a chunk removal, it can deadlock with another task that is currently allocating a new data or metadata chunk. These contexts are the following: * When relocating a system chunk, when we need to COW the extent buffers that belong to the chunk btree; * When adding a new device (ioctl), where we need to add a new device item to the chunk btree; * When removing a device (ioctl), where we need to remove a device item from the chunk btree; * When resizing a device (ioctl), where we need to update a device item in the chunk btree and may need to relocate a system chunk that lies beyond the new device size when shrinking a device. The problem happens due to a sequence of steps like the following: 1) Task A starts a data or metadata chunk allocation and it locks the chunk mutex; 2) Task B is relocating a system chunk, and when it needs to COW an extent buffer of the chunk btree, it has locked both that extent buffer as well as its parent extent buffer; 3) Since there is not enough available system space, either because none of the existing system block groups have enough free space or because the only one with enough free space is in RO mode due to the relocation, task B triggers a new system chunk allocation. It blocks when trying to acquire the chunk mutex, currently held by task A; 4) Task A enters btrfs_chunk_alloc_add_chunk_item(), in order to insert the new chunk item into the chunk btree and update the existing device items there. But in order to do that, it has to lock the extent buffer that task B locked at step 2, or its parent extent buffer, but task B is waiting on the chunk mutex, which is currently locked by task A, therefore resulting in a deadlock. One example report when the deadlock happens with system chunk relocation: INFO: task kworker/u9:5:546 blocked for more than 143 seconds. Not tainted 5.15.0-rc3+ #1 "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. task:kworker/u9:5 state:D stack:25936 pid: 546 ppid: 2 flags:0x00004000 Workqueue: events_unbound btrfs_async_reclaim_metadata_space Call Trace: context_switch kernel/sched/core.c:4940 [inline] __schedule+0xcd9/0x2530 kernel/sched/core.c:6287 schedule+0xd3/0x270 kernel/sched/core.c:6366 rwsem_down_read_slowpath+0x4ee/0x9d0 kernel/locking/rwsem.c:993 __down_read_common kernel/locking/rwsem.c:1214 [inline] __down_read kernel/locking/rwsem.c:1223 [inline] down_read_nested+0xe6/0x440 kernel/locking/rwsem.c:1590 __btrfs_tree_read_lock+0x31/0x350 fs/btrfs/locking.c:47 btrfs_tree_read_lock fs/btrfs/locking.c:54 [inline] btrfs_read_lock_root_node+0x8a/0x320 fs/btrfs/locking.c:191 btrfs_search_slot_get_root fs/btrfs/ctree.c:1623 [inline] btrfs_search_slot+0x13b4/0x2140 fs/btrfs/ctree.c:1728 btrfs_update_device+0x11f/0x500 fs/btrfs/volumes.c:2794 btrfs_chunk_alloc_add_chunk_item+0x34d/0xea0 fs/btrfs/volumes.c:5504 do_chunk_alloc fs/btrfs/block-group.c:3408 [inline] btrfs_chunk_alloc+0x84d/0xf50 fs/btrfs/block-group.c:3653 flush_space+0x54e/0xd80 fs/btrfs/space-info.c:670 btrfs_async_reclaim_metadata_space+0x396/0xa90 fs/btrfs/space-info.c:953 process_one_work+0x9df/0x16d0 kernel/workqueue.c:2297 worker_thread+0x90/0xed0 kernel/workqueue.c:2444 kthread+0x3e5/0x4d0 kernel/kthread.c:319 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:295 INFO: task syz-executor:9107 blocked for more than 143 seconds. Not tainted 5.15.0-rc3+ #1 "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. task:syz-executor state:D stack:23200 pid: 9107 ppid: 7792 flags:0x00004004 Call Trace: context_switch kernel/sched/core.c:4940 [inline] __schedule+0xcd9/0x2530 kernel/sched/core.c:6287 schedule+0xd3/0x270 kernel/sched/core.c:6366 schedule_preempt_disabled+0xf/0x20 kernel/sched/core.c:6425 __mutex_lock_common kernel/locking/mutex.c:669 [inline] __mutex_lock+0xc96/0x1680 kernel/locking/mutex.c:729 btrfs_chunk_alloc+0x31a/0xf50 fs/btrfs/block-group.c:3631 find_free_extent_update_loop fs/btrfs/extent-tree.c:3986 [inline] find_free_extent+0x25cb/0x3a30 fs/btrfs/extent-tree.c:4335 btrfs_reserve_extent+0x1f1/0x500 fs/btrfs/extent-tree.c:4415 btrfs_alloc_tree_block+0x203/0x1120 fs/btrfs/extent-tree.c:4813 __btrfs_cow_block+0x412/0x1620 fs/btrfs/ctree.c:415 btrfs_cow_block+0x2f6/0x8c0 fs/btrfs/ctree.c:570 btrfs_search_slot+0x1094/0x2140 fs/btrfs/ctree.c:1768 relocate_tree_block fs/btrfs/relocation.c:2694 [inline] relocate_tree_blocks+0xf73/0x1770 fs/btrfs/relocation.c:2757 relocate_block_group+0x47e/0xc70 fs/btrfs/relocation.c:3673 btrfs_relocate_block_group+0x48a/0xc60 fs/btrfs/relocation.c:4070 btrfs_relocate_chunk+0x96/0x280 fs/btrfs/volumes.c:3181 __btrfs_balance fs/btrfs/volumes.c:3911 [inline] btrfs_balance+0x1f03/0x3cd0 fs/btrfs/volumes.c:4301 btrfs_ioctl_balance+0x61e/0x800 fs/btrfs/ioctl.c:4137 btrfs_ioctl+0x39ea/0x7b70 fs/btrfs/ioctl.c:4949 vfs_ioctl fs/ioctl.c:51 [inline] __do_sys_ioctl fs/ioctl.c:874 [inline] __se_sys_ioctl fs/ioctl.c:860 [inline] __x64_sys_ioctl+0x193/0x200 fs/ioctl.c:860 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x44/0xae So fix this by making sure that whenever we try to modify the chunk btree and we are neither in a chunk allocation context nor in a chunk remove context, we reserve system space before modifying the chunk btree. Reported-by: Hao Sun Link: https://lore.kernel.org/linux-btrfs/CACkBjsax51i4mu6C0C3vJqQN3NR_iVuucoeG3U1HXjrgzn5FFQ@mail.gmail.com/ Fixes: 79bd37120b1495 ("btrfs: rework chunk allocation to avoid exhaustion of the system chunk array") CC: stable@vger.kernel.org # 5.14+ Reviewed-by: Josef Bacik Signed-off-by: Filipe Manana Signed-off-by: David Sterba Signed-off-by: Sasha Levin --- fs/btrfs/block-group.c | 146 +++++++++++++++++++++++++---------------- fs/btrfs/block-group.h | 2 + fs/btrfs/relocation.c | 4 ++ fs/btrfs/volumes.c | 15 ++++- 4 files changed, 111 insertions(+), 56 deletions(-) diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c index aadc1203ad885e..c6c5a22ff6e86a 100644 --- a/fs/btrfs/block-group.c +++ b/fs/btrfs/block-group.c @@ -3406,25 +3406,6 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags) goto out; } - /* - * If this is a system chunk allocation then stop right here and do not - * add the chunk item to the chunk btree. This is to prevent a deadlock - * because this system chunk allocation can be triggered while COWing - * some extent buffer of the chunk btree and while holding a lock on a - * parent extent buffer, in which case attempting to insert the chunk - * item (or update the device item) would result in a deadlock on that - * parent extent buffer. In this case defer the chunk btree updates to - * the second phase of chunk allocation and keep our reservation until - * the second phase completes. - * - * This is a rare case and can only be triggered by the very few cases - * we have where we need to touch the chunk btree outside chunk allocation - * and chunk removal. These cases are basically adding a device, removing - * a device or resizing a device. - */ - if (flags & BTRFS_BLOCK_GROUP_SYSTEM) - return 0; - ret = btrfs_chunk_alloc_add_chunk_item(trans, bg); /* * Normally we are not expected to fail with -ENOSPC here, since we have @@ -3557,14 +3538,14 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags) * This has happened before and commit eafa4fd0ad0607 ("btrfs: fix exhaustion of * the system chunk array due to concurrent allocations") provides more details. * - * For allocation of system chunks, we defer the updates and insertions into the - * chunk btree to phase 2. This is to prevent deadlocks on extent buffers because - * if the chunk allocation is triggered while COWing an extent buffer of the - * chunk btree, we are holding a lock on the parent of that extent buffer and - * doing the chunk btree updates and insertions can require locking that parent. - * This is for the very few and rare cases where we update the chunk btree that - * are not chunk allocation or chunk removal: adding a device, removing a device - * or resizing a device. + * Allocation of system chunks does not happen through this function. A task that + * needs to update the chunk btree (the only btree that uses system chunks), must + * preallocate chunk space by calling either check_system_chunk() or + * btrfs_reserve_chunk_metadata() - the former is used when allocating a data or + * metadata chunk or when removing a chunk, while the later is used before doing + * a modification to the chunk btree - use cases for the later are adding, + * removing and resizing a device as well as relocation of a system chunk. + * See the comment below for more details. * * The reservation of system space, done through check_system_chunk(), as well * as all the updates and insertions into the chunk btree must be done while @@ -3601,11 +3582,27 @@ int btrfs_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags, if (trans->allocating_chunk) return -ENOSPC; /* - * If we are removing a chunk, don't re-enter or we would deadlock. - * System space reservation and system chunk allocation is done by the - * chunk remove operation (btrfs_remove_chunk()). + * Allocation of system chunks can not happen through this path, as we + * could end up in a deadlock if we are allocating a data or metadata + * chunk and there is another task modifying the chunk btree. + * + * This is because while we are holding the chunk mutex, we will attempt + * to add the new chunk item to the chunk btree or update an existing + * device item in the chunk btree, while the other task that is modifying + * the chunk btree is attempting to COW an extent buffer while holding a + * lock on it and on its parent - if the COW operation triggers a system + * chunk allocation, then we can deadlock because we are holding the + * chunk mutex and we may need to access that extent buffer or its parent + * in order to add the chunk item or update a device item. + * + * Tasks that want to modify the chunk tree should reserve system space + * before updating the chunk btree, by calling either + * btrfs_reserve_chunk_metadata() or check_system_chunk(). + * It's possible that after a task reserves the space, it still ends up + * here - this happens in the cases described above at do_chunk_alloc(). + * The task will have to either retry or fail. */ - if (trans->removing_chunk) + if (flags & BTRFS_BLOCK_GROUP_SYSTEM) return -ENOSPC; space_info = btrfs_find_space_info(fs_info, flags); @@ -3704,17 +3701,14 @@ static u64 get_profile_num_devs(struct btrfs_fs_info *fs_info, u64 type) return num_dev; } -/* - * Reserve space in the system space for allocating or removing a chunk - */ -void check_system_chunk(struct btrfs_trans_handle *trans, u64 type) +static void reserve_chunk_space(struct btrfs_trans_handle *trans, + u64 bytes, + u64 type) { struct btrfs_fs_info *fs_info = trans->fs_info; struct btrfs_space_info *info; u64 left; - u64 thresh; int ret = 0; - u64 num_devs; /* * Needed because we can end up allocating a system chunk and for an @@ -3727,19 +3721,13 @@ void check_system_chunk(struct btrfs_trans_handle *trans, u64 type) left = info->total_bytes - btrfs_space_info_used(info, true); spin_unlock(&info->lock); - num_devs = get_profile_num_devs(fs_info, type); - - /* num_devs device items to update and 1 chunk item to add or remove */ - thresh = btrfs_calc_metadata_size(fs_info, num_devs) + - btrfs_calc_insert_metadata_size(fs_info, 1); - - if (left < thresh && btrfs_test_opt(fs_info, ENOSPC_DEBUG)) { + if (left < bytes && btrfs_test_opt(fs_info, ENOSPC_DEBUG)) { btrfs_info(fs_info, "left=%llu, need=%llu, flags=%llu", - left, thresh, type); + left, bytes, type); btrfs_dump_space_info(fs_info, info, 0, 0); } - if (left < thresh) { + if (left < bytes) { u64 flags = btrfs_system_alloc_profile(fs_info); struct btrfs_block_group *bg; @@ -3748,21 +3736,20 @@ void check_system_chunk(struct btrfs_trans_handle *trans, u64 type) * needing it, as we might not need to COW all nodes/leafs from * the paths we visit in the chunk tree (they were already COWed * or created in the current transaction for example). - * - * Also, if our caller is allocating a system chunk, do not - * attempt to insert the chunk item in the chunk btree, as we - * could deadlock on an extent buffer since our caller may be - * COWing an extent buffer from the chunk btree. */ bg = btrfs_create_chunk(trans, flags); if (IS_ERR(bg)) { ret = PTR_ERR(bg); - } else if (!(type & BTRFS_BLOCK_GROUP_SYSTEM)) { + } else { /* * If we fail to add the chunk item here, we end up * trying again at phase 2 of chunk allocation, at * btrfs_create_pending_block_groups(). So ignore - * any error here. + * any error here. An ENOSPC here could happen, due to + * the cases described at do_chunk_alloc() - the system + * block group we just created was just turned into RO + * mode by a scrub for example, or a running discard + * temporarily removed its free space entries, etc. */ btrfs_chunk_alloc_add_chunk_item(trans, bg); } @@ -3771,12 +3758,61 @@ void check_system_chunk(struct btrfs_trans_handle *trans, u64 type) if (!ret) { ret = btrfs_block_rsv_add(fs_info->chunk_root, &fs_info->chunk_block_rsv, - thresh, BTRFS_RESERVE_NO_FLUSH); + bytes, BTRFS_RESERVE_NO_FLUSH); if (!ret) - trans->chunk_bytes_reserved += thresh; + trans->chunk_bytes_reserved += bytes; } } +/* + * Reserve space in the system space for allocating or removing a chunk. + * The caller must be holding fs_info->chunk_mutex. + */ +void check_system_chunk(struct btrfs_trans_handle *trans, u64 type) +{ + struct btrfs_fs_info *fs_info = trans->fs_info; + const u64 num_devs = get_profile_num_devs(fs_info, type); + u64 bytes; + + /* num_devs device items to update and 1 chunk item to add or remove. */ + bytes = btrfs_calc_metadata_size(fs_info, num_devs) + + btrfs_calc_insert_metadata_size(fs_info, 1); + + reserve_chunk_space(trans, bytes, type); +} + +/* + * Reserve space in the system space, if needed, for doing a modification to the + * chunk btree. + * + * @trans: A transaction handle. + * @is_item_insertion: Indicate if the modification is for inserting a new item + * in the chunk btree or if it's for the deletion or update + * of an existing item. + * + * This is used in a context where we need to update the chunk btree outside + * block group allocation and removal, to avoid a deadlock with a concurrent + * task that is allocating a metadata or data block group and therefore needs to + * update the chunk btree while holding the chunk mutex. After the update to the + * chunk btree is done, btrfs_trans_release_chunk_metadata() should be called. + * + */ +void btrfs_reserve_chunk_metadata(struct btrfs_trans_handle *trans, + bool is_item_insertion) +{ + struct btrfs_fs_info *fs_info = trans->fs_info; + u64 bytes; + + if (is_item_insertion) + bytes = btrfs_calc_insert_metadata_size(fs_info, 1); + else + bytes = btrfs_calc_metadata_size(fs_info, 1); + + mutex_lock(&fs_info->chunk_mutex); + reserve_chunk_space(trans, bytes, BTRFS_BLOCK_GROUP_SYSTEM); + mutex_unlock(&fs_info->chunk_mutex); +} + void btrfs_put_block_group_cache(struct btrfs_fs_info *info) { struct btrfs_block_group *block_group; diff --git a/fs/btrfs/block-group.h b/fs/btrfs/block-group.h index c72a71efcb1872..37e55ebde735bc 100644 --- a/fs/btrfs/block-group.h +++ b/fs/btrfs/block-group.h @@ -289,6 +289,8 @@ int btrfs_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags, enum btrfs_chunk_alloc_enum force); int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans, u64 type); void check_system_chunk(struct btrfs_trans_handle *trans, const u64 type); +void btrfs_reserve_chunk_metadata(struct btrfs_trans_handle *trans, + bool is_item_insertion); u64 btrfs_get_alloc_profile(struct btrfs_fs_info *fs_info, u64 orig_flags); void btrfs_put_block_group_cache(struct btrfs_fs_info *info); int btrfs_free_block_groups(struct btrfs_fs_info *info); diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 0300770c0a89d8..429a198f8937d4 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -2698,8 +2698,12 @@ static int relocate_tree_block(struct btrfs_trans_handle *trans, list_add_tail(&node->list, &rc->backref_cache.changed); } else { path->lowest_level = node->level; + if (root == root->fs_info->chunk_root) + btrfs_reserve_chunk_metadata(trans, false); ret = btrfs_search_slot(trans, root, key, path, 0, 1); btrfs_release_path(path); + if (root == root->fs_info->chunk_root) + btrfs_trans_release_chunk_metadata(trans); if (ret > 0) ret = 0; } diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index b75ce79a2540cb..fa68efd7e61063 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -1879,8 +1879,10 @@ static int btrfs_add_dev_item(struct btrfs_trans_handle *trans, key.type = BTRFS_DEV_ITEM_KEY; key.offset = device->devid; + btrfs_reserve_chunk_metadata(trans, true); ret = btrfs_insert_empty_item(trans, trans->fs_info->chunk_root, path, &key, sizeof(*dev_item)); + btrfs_trans_release_chunk_metadata(trans); if (ret) goto out; @@ -1957,7 +1959,9 @@ static int btrfs_rm_dev_item(struct btrfs_device *device) key.type = BTRFS_DEV_ITEM_KEY; key.offset = device->devid; + btrfs_reserve_chunk_metadata(trans, false); ret = btrfs_search_slot(trans, root, &key, path, -1, 1); + btrfs_trans_release_chunk_metadata(trans); if (ret) { if (ret > 0) ret = -ENOENT; @@ -2513,7 +2517,9 @@ static int btrfs_finish_sprout(struct btrfs_trans_handle *trans) key.type = BTRFS_DEV_ITEM_KEY; while (1) { + btrfs_reserve_chunk_metadata(trans, false); ret = btrfs_search_slot(trans, root, &key, path, 0, 1); + btrfs_trans_release_chunk_metadata(trans); if (ret < 0) goto error; @@ -2861,6 +2867,7 @@ int btrfs_grow_device(struct btrfs_trans_handle *trans, struct btrfs_super_block *super_copy = fs_info->super_copy; u64 old_total; u64 diff; + int ret; if (!test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) return -EACCES; @@ -2889,7 +2896,11 @@ int btrfs_grow_device(struct btrfs_trans_handle *trans, &trans->transaction->dev_update_list); mutex_unlock(&fs_info->chunk_mutex); - return btrfs_update_device(trans, device); + btrfs_reserve_chunk_metadata(trans, false); + ret = btrfs_update_device(trans, device); + btrfs_trans_release_chunk_metadata(trans); + + return ret; } static int btrfs_free_chunk(struct btrfs_trans_handle *trans, u64 chunk_offset) @@ -4926,8 +4937,10 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size) round_down(old_total - diff, fs_info->sectorsize)); mutex_unlock(&fs_info->chunk_mutex); + btrfs_reserve_chunk_metadata(trans, false); /* Now btrfs_update_device() will change the on-disk size. */ ret = btrfs_update_device(trans, device); + btrfs_trans_release_chunk_metadata(trans); if (ret < 0) { btrfs_abort_transaction(trans, ret); btrfs_end_transaction(trans); From d839d15b50743164d7ad95f436ea284a2946c179 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Wed, 28 Jul 2021 12:21:00 -0700 Subject: [PATCH 0295/1056] drm/i915: Disable bonding on gen12+ platforms [ Upstream commit ce7e75c7ef1bf8ea3d947da8c674d2f40fd7d734 ] Disable bonding on gen12+ platforms aside from ones already supported by the i915 - TGL, RKL, and ADL-S. Signed-off-by: Matthew Brost Reviewed-by: John Harrison Acked-by: Daniel Vetter Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20210728192100.132425-1-matthew.brost@intel.com Signed-off-by: Sasha Levin --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index ee0c0b712522ff..ba2e037a82e4ef 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -442,6 +442,13 @@ set_proto_ctx_engines_bond(struct i915_user_extension __user *base, void *data) u16 idx, num_bonds; int err, n; + if (GRAPHICS_VER(i915) >= 12 && !IS_TIGERLAKE(i915) && + !IS_ROCKETLAKE(i915) && !IS_ALDERLAKE_S(i915)) { + drm_dbg(&i915->drm, + "Bonding on gen12+ aside from TGL, RKL, and ADL_S not supported\n"); + return -ENODEV; + } + if (get_user(idx, &ext->virtual_index)) return -EFAULT; From 9cf3a1c1288e43af00d70a8520ea9efbea01615e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= Date: Wed, 22 Sep 2021 08:25:23 +0200 Subject: [PATCH 0296/1056] drm/i915/gt: Register the migrate contexts with their engines MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 3e42cc61275f95fd7f022b6380b95428efe134d3 ] Pinned contexts, like the migrate contexts need reset after resume since their context image may have been lost. Also the GuC needs to register pinned contexts. Add a list to struct intel_engine_cs where we add all pinned contexts on creation, and traverse that list at resume time to reset the pinned contexts. This fixes the kms_pipe_crc_basic@suspend-read-crc-pipe-a selftest for now, but proper LMEM backup / restore is needed for full suspend functionality. However, note that even with full LMEM backup / restore it may be desirable to keep the reset since backing up the migrate context images must happen using memcpy() after the migrate context has become inactive, and for performance- and other reasons we want to avoid memcpy() from LMEM. Also traverse the list at guc_init_lrc_mapping() calling guc_kernel_context_pin() for the pinned contexts, like is already done for the kernel context. v2: - Don't reset the contexts on each __engine_unpark() but rather at resume time (Chris Wilson). v3: - Reset contexts in the engine sanitize callback. (Chris Wilson) Cc: Tvrtko Ursulin Cc: Matthew Auld Cc: Maarten Lankhorst Cc: Brost Matthew Cc: Chris Wilson Signed-off-by: Thomas Hellström Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20210922062527.865433-6-thomas.hellstrom@linux.intel.com Signed-off-by: Sasha Levin --- drivers/gpu/drm/i915/gt/intel_context_types.h | 8 +++++++ drivers/gpu/drm/i915/gt/intel_engine_cs.c | 4 ++++ drivers/gpu/drm/i915/gt/intel_engine_pm.c | 23 +++++++++++++++++++ drivers/gpu/drm/i915/gt/intel_engine_pm.h | 2 ++ drivers/gpu/drm/i915/gt/intel_engine_types.h | 7 ++++++ .../drm/i915/gt/intel_execlists_submission.c | 2 ++ .../gpu/drm/i915/gt/intel_ring_submission.c | 3 +++ drivers/gpu/drm/i915/gt/mock_engine.c | 2 ++ .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 12 +++++++--- 9 files changed, 60 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h index e54351a170e2cb..a63631ea0ec471 100644 --- a/drivers/gpu/drm/i915/gt/intel_context_types.h +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h @@ -152,6 +152,14 @@ struct intel_context { /** sseu: Control eu/slice partitioning */ struct intel_sseu sseu; + /** + * pinned_contexts_link: List link for the engine's pinned contexts. + * This is only used if this is a perma-pinned kernel context and + * the list is assumed to only be manipulated during driver load + * or unload time so no mutex protection currently. + */ + struct list_head pinned_contexts_link; + u8 wa_bb_page; /* if set, page num reserved for context workarounds */ struct { diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 0d9105a31d84ec..eb99441e0ada08 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -320,6 +320,7 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id) BUILD_BUG_ON(BITS_PER_TYPE(engine->mask) < I915_NUM_ENGINES); + INIT_LIST_HEAD(&engine->pinned_contexts_list); engine->id = id; engine->legacy_idx = INVALID_ENGINE; engine->mask = BIT(id); @@ -875,6 +876,8 @@ intel_engine_create_pinned_context(struct intel_engine_cs *engine, return ERR_PTR(err); } + list_add_tail(&ce->pinned_contexts_link, &engine->pinned_contexts_list); + /* * Give our perma-pinned kernel timelines a separate lockdep class, * so that we can use them from within the normal user timelines @@ -897,6 +900,7 @@ void intel_engine_destroy_pinned_context(struct intel_context *ce) list_del(&ce->timeline->engine_link); mutex_unlock(&hwsp->vm->mutex); + list_del(&ce->pinned_contexts_link); intel_context_unpin(ce); intel_context_put(ce); } diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c index 1f07ac4e0672a6..dacd6277373592 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c @@ -298,6 +298,29 @@ void intel_engine_init__pm(struct intel_engine_cs *engine) intel_engine_init_heartbeat(engine); } +/** + * intel_engine_reset_pinned_contexts - Reset the pinned contexts of + * an engine. + * @engine: The engine whose pinned contexts we want to reset. + * + * Typically the pinned context LMEM images lose or get their content + * corrupted on suspend. This function resets their images. + */ +void intel_engine_reset_pinned_contexts(struct intel_engine_cs *engine) +{ + struct intel_context *ce; + + list_for_each_entry(ce, &engine->pinned_contexts_list, + pinned_contexts_link) { + /* kernel context gets reset at __engine_unpark() */ + if (ce == engine->kernel_context) + continue; + + dbg_poison_ce(ce); + ce->ops->reset(ce); + } +} + #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #include "selftest_engine_pm.c" #endif diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.h b/drivers/gpu/drm/i915/gt/intel_engine_pm.h index 70ea46d6cfb00c..8520c595f5e189 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.h @@ -69,4 +69,6 @@ intel_engine_create_kernel_request(struct intel_engine_cs *engine) void intel_engine_init__pm(struct intel_engine_cs *engine); +void intel_engine_reset_pinned_contexts(struct intel_engine_cs *engine); + #endif /* INTEL_ENGINE_PM_H */ diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index ed91bcff20eb59..adc44c9fac6de7 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -304,6 +304,13 @@ struct intel_engine_cs { struct intel_context *kernel_context; /* pinned */ + /** + * pinned_contexts_list: List of pinned contexts. This list is only + * assumed to be manipulated during driver load- or unload time and + * does therefore not have any additional protection. + */ + struct list_head pinned_contexts_list; + intel_engine_mask_t saturated; /* submitting semaphores too late? */ struct { diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c index cafb0608ffb469..416f5e0657f07b 100644 --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c @@ -2787,6 +2787,8 @@ static void execlists_sanitize(struct intel_engine_cs *engine) /* And scrub the dirty cachelines for the HWSP */ clflush_cache_range(engine->status_page.addr, PAGE_SIZE); + + intel_engine_reset_pinned_contexts(engine); } static void enable_error_interrupt(struct intel_engine_cs *engine) diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c index 2958e2fae3800e..6f2f6ba873974e 100644 --- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c @@ -17,6 +17,7 @@ #include "intel_ring.h" #include "shmem_utils.h" #include "intel_engine_heartbeat.h" +#include "intel_engine_pm.h" /* Rough estimate of the typical request size, performing a flush, * set-context and then emitting the batch. @@ -292,6 +293,8 @@ static void xcs_sanitize(struct intel_engine_cs *engine) /* And scrub the dirty cachelines for the HWSP */ clflush_cache_range(engine->status_page.addr, PAGE_SIZE); + + intel_engine_reset_pinned_contexts(engine); } static void reset_prepare(struct intel_engine_cs *engine) diff --git a/drivers/gpu/drm/i915/gt/mock_engine.c b/drivers/gpu/drm/i915/gt/mock_engine.c index 2c1af030310c0d..8b89215afe46b1 100644 --- a/drivers/gpu/drm/i915/gt/mock_engine.c +++ b/drivers/gpu/drm/i915/gt/mock_engine.c @@ -376,6 +376,8 @@ int mock_engine_init(struct intel_engine_cs *engine) { struct intel_context *ce; + INIT_LIST_HEAD(&engine->pinned_contexts_list); + engine->sched_engine = i915_sched_engine_create(ENGINE_MOCK); if (!engine->sched_engine) return -ENOMEM; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index 93c9de8f43e8ef..6e09a1cca37b40 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -2347,6 +2347,8 @@ static void guc_sanitize(struct intel_engine_cs *engine) /* And scrub the dirty cachelines for the HWSP */ clflush_cache_range(engine->status_page.addr, PAGE_SIZE); + + intel_engine_reset_pinned_contexts(engine); } static void setup_hwsp(struct intel_engine_cs *engine) @@ -2422,9 +2424,13 @@ static inline void guc_init_lrc_mapping(struct intel_guc *guc) * and even it did this code would be run again. */ - for_each_engine(engine, gt, id) - if (engine->kernel_context) - guc_kernel_context_pin(guc, engine->kernel_context); + for_each_engine(engine, gt, id) { + struct intel_context *ce; + + list_for_each_entry(ce, &engine->pinned_contexts_list, + pinned_contexts_link) + guc_kernel_context_pin(guc, ce); + } } static void guc_release(struct intel_engine_cs *engine) From b33035945b0a6853f8f6f63fb3c3bc9ea869337e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 14 Oct 2021 12:09:38 +0300 Subject: [PATCH 0297/1056] drm/i915: Replace the unconditional clflush with drm_clflush_virt_range() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit ef7ec41f17cbc0861891ccc0634d06a0c8dcbf09 ] Not all machines have clflush, so don't go assuming they do. Not really sure why the clflush is even here since hwsp is supposed to get snooped I thought. Although in my case we're talking about a i830 machine where render/blitter snooping is definitely busted. But it might work for the hswp perhaps. Haven't really reverse engineered that one fully. Cc: stable@vger.kernel.org Cc: Chris Wilson Cc: Mika Kuoppala Fixes: b436a5f8b6c8 ("drm/i915/gt: Track all timelines created using the HWSP") Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20211014090941.12159-2-ville.syrjala@linux.intel.com Reviewed-by: Dave Airlie Signed-off-by: Sasha Levin --- drivers/gpu/drm/i915/gt/intel_ring_submission.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c index 6f2f6ba873974e..02e18e70c78ea0 100644 --- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c @@ -292,7 +292,7 @@ static void xcs_sanitize(struct intel_engine_cs *engine) sanitize_hwsp(engine); /* And scrub the dirty cachelines for the HWSP */ - clflush_cache_range(engine->status_page.addr, PAGE_SIZE); + drm_clflush_virt_range(engine->status_page.addr, PAGE_SIZE); intel_engine_reset_pinned_contexts(engine); } From 006d00d826fb8ccc82db2168e949467fddafd944 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Fri, 15 Oct 2021 13:58:40 -0500 Subject: [PATCH 0298/1056] PCI/portdrv: Rename pm_iter() to pcie_port_device_iter() [ Upstream commit 3134689f98f9e09004a4727370adc46e7635b4be ] Rename pm_iter() to pcie_port_device_iter() and make it visible outside CONFIG_PM and portdrv_core.c so it can be used for pciehp slot reset recovery. [bhelgaas: split into its own patch] Link: https://lore.kernel.org/linux-pci/08c046b0-c9f2-3489-eeef-7e7aca435bb9@gmail.com/ Link: https://lore.kernel.org/r/251f4edcc04c14f873ff1c967bc686169cd07d2d.1627638184.git.lukas@wunner.de Signed-off-by: Lukas Wunner Signed-off-by: Bjorn Helgaas Signed-off-by: Sasha Levin --- drivers/pci/pcie/portdrv.h | 1 + drivers/pci/pcie/portdrv_core.c | 20 ++++++++++---------- 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/drivers/pci/pcie/portdrv.h b/drivers/pci/pcie/portdrv.h index 2ff5724b8f13f0..6126ee4676a7fa 100644 --- a/drivers/pci/pcie/portdrv.h +++ b/drivers/pci/pcie/portdrv.h @@ -110,6 +110,7 @@ void pcie_port_service_unregister(struct pcie_port_service_driver *new); extern struct bus_type pcie_port_bus_type; int pcie_port_device_register(struct pci_dev *dev); +int pcie_port_device_iter(struct device *dev, void *data); #ifdef CONFIG_PM int pcie_port_device_suspend(struct device *dev); int pcie_port_device_resume_noirq(struct device *dev); diff --git a/drivers/pci/pcie/portdrv_core.c b/drivers/pci/pcie/portdrv_core.c index 3ee63968deaa58..604feeb84ee402 100644 --- a/drivers/pci/pcie/portdrv_core.c +++ b/drivers/pci/pcie/portdrv_core.c @@ -367,24 +367,24 @@ int pcie_port_device_register(struct pci_dev *dev) return status; } -#ifdef CONFIG_PM -typedef int (*pcie_pm_callback_t)(struct pcie_device *); +typedef int (*pcie_callback_t)(struct pcie_device *); -static int pm_iter(struct device *dev, void *data) +int pcie_port_device_iter(struct device *dev, void *data) { struct pcie_port_service_driver *service_driver; size_t offset = *(size_t *)data; - pcie_pm_callback_t cb; + pcie_callback_t cb; if ((dev->bus == &pcie_port_bus_type) && dev->driver) { service_driver = to_service_driver(dev->driver); - cb = *(pcie_pm_callback_t *)((void *)service_driver + offset); + cb = *(pcie_callback_t *)((void *)service_driver + offset); if (cb) return cb(to_pcie_device(dev)); } return 0; } +#ifdef CONFIG_PM /** * pcie_port_device_suspend - suspend port services associated with a PCIe port * @dev: PCI Express port to handle @@ -392,13 +392,13 @@ static int pm_iter(struct device *dev, void *data) int pcie_port_device_suspend(struct device *dev) { size_t off = offsetof(struct pcie_port_service_driver, suspend); - return device_for_each_child(dev, &off, pm_iter); + return device_for_each_child(dev, &off, pcie_port_device_iter); } int pcie_port_device_resume_noirq(struct device *dev) { size_t off = offsetof(struct pcie_port_service_driver, resume_noirq); - return device_for_each_child(dev, &off, pm_iter); + return device_for_each_child(dev, &off, pcie_port_device_iter); } /** @@ -408,7 +408,7 @@ int pcie_port_device_resume_noirq(struct device *dev) int pcie_port_device_resume(struct device *dev) { size_t off = offsetof(struct pcie_port_service_driver, resume); - return device_for_each_child(dev, &off, pm_iter); + return device_for_each_child(dev, &off, pcie_port_device_iter); } /** @@ -418,7 +418,7 @@ int pcie_port_device_resume(struct device *dev) int pcie_port_device_runtime_suspend(struct device *dev) { size_t off = offsetof(struct pcie_port_service_driver, runtime_suspend); - return device_for_each_child(dev, &off, pm_iter); + return device_for_each_child(dev, &off, pcie_port_device_iter); } /** @@ -428,7 +428,7 @@ int pcie_port_device_runtime_suspend(struct device *dev) int pcie_port_device_runtime_resume(struct device *dev) { size_t off = offsetof(struct pcie_port_service_driver, runtime_resume); - return device_for_each_child(dev, &off, pm_iter); + return device_for_each_child(dev, &off, pcie_port_device_iter); } #endif /* PM */ From e1716b0ff925fcf5b2fdbd49356105e72ff60966 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Sat, 31 Jul 2021 14:39:01 +0200 Subject: [PATCH 0299/1056] PCI: pciehp: Ignore Link Down/Up caused by error-induced Hot Reset [ Upstream commit ea401499e943c307e6d44af6c2b4e068643e7884 ] Stuart Hayes reports that an error handled by DPC at a Root Port results in pciehp gratuitously bringing down a subordinate hotplug port: RP -- UP -- DP -- UP -- DP (hotplug) -- EP pciehp brings the slot down because the Link to the Endpoint goes down. That is caused by a Hot Reset being propagated as a result of DPC. Per PCIe Base Spec 5.0, section 6.6.1 "Conventional Reset": For a Switch, the following must cause a hot reset to be sent on all Downstream Ports: [...] * The Data Link Layer of the Upstream Port reporting DL_Down status. In Switches that support Link speeds greater than 5.0 GT/s, the Upstream Port must direct the LTSSM of each Downstream Port to the Hot Reset state, but not hold the LTSSMs in that state. This permits each Downstream Port to begin Link training immediately after its hot reset completes. This behavior is recommended for all Switches. * Receiving a hot reset on the Upstream Port. Once DPC recovers, pcie_do_recovery() walks down the hierarchy and invokes pcie_portdrv_slot_reset() to restore each port's config space. At that point, a hotplug interrupt is signaled per PCIe Base Spec r5.0, section 6.7.3.4 "Software Notification of Hot-Plug Events": If the Port is enabled for edge-triggered interrupt signaling using MSI or MSI-X, an interrupt message must be sent every time the logical AND of the following conditions transitions from FALSE to TRUE: [...] * The Hot-Plug Interrupt Enable bit in the Slot Control register is set to 1b. * At least one hot-plug event status bit in the Slot Status register and its associated enable bit in the Slot Control register are both set to 1b. Prevent pciehp from gratuitously bringing down the slot by clearing the error-induced Data Link Layer State Changed event before restoring config space. Afterwards, check whether the link has unexpectedly failed to retrain and synthesize a DLLSC event if so. Allow each pcie_port_service_driver (one of them being pciehp) to define a slot_reset callback and re-use the existing pm_iter() function to iterate over the callbacks. Thereby, the Endpoint driver remains bound throughout error recovery and may restore the device to working state. Surprise removal during error recovery is detected through a Presence Detect Changed event. The hotplug port is expected to not signal that event as a result of a Hot Reset. The issue isn't DPC-specific, it also occurs when an error is handled by AER through aer_root_reset(). So while the issue was noticed only now, it's been around since 2006 when AER support was first introduced. [bhelgaas: drop PCI_ERROR_RECOVERY Kconfig, split pm_iter() rename to preparatory patch] Link: https://lore.kernel.org/linux-pci/08c046b0-c9f2-3489-eeef-7e7aca435bb9@gmail.com/ Fixes: 6c2b374d7485 ("PCI-Express AER implemetation: AER core and aerdriver") Link: https://lore.kernel.org/r/251f4edcc04c14f873ff1c967bc686169cd07d2d.1627638184.git.lukas@wunner.de Reported-by: Stuart Hayes Tested-by: Stuart Hayes Signed-off-by: Lukas Wunner Signed-off-by: Bjorn Helgaas Cc: stable@vger.kernel.org # v2.6.19+: ba952824e6c1: PCI/portdrv: Report reset for frozen channel Cc: Keith Busch Signed-off-by: Sasha Levin --- drivers/pci/hotplug/pciehp.h | 2 ++ drivers/pci/hotplug/pciehp_core.c | 2 ++ drivers/pci/hotplug/pciehp_hpc.c | 26 ++++++++++++++++++++++++++ drivers/pci/pcie/portdrv.h | 2 ++ drivers/pci/pcie/portdrv_pci.c | 3 +++ 5 files changed, 35 insertions(+) diff --git a/drivers/pci/hotplug/pciehp.h b/drivers/pci/hotplug/pciehp.h index 10d7e7e1b55309..e0a614acee059b 100644 --- a/drivers/pci/hotplug/pciehp.h +++ b/drivers/pci/hotplug/pciehp.h @@ -192,6 +192,8 @@ int pciehp_get_attention_status(struct hotplug_slot *hotplug_slot, u8 *status); int pciehp_set_raw_indicator_status(struct hotplug_slot *h_slot, u8 status); int pciehp_get_raw_indicator_status(struct hotplug_slot *h_slot, u8 *status); +int pciehp_slot_reset(struct pcie_device *dev); + static inline const char *slot_name(struct controller *ctrl) { return hotplug_slot_name(&ctrl->hotplug_slot); diff --git a/drivers/pci/hotplug/pciehp_core.c b/drivers/pci/hotplug/pciehp_core.c index e7fe4b42f0394e..4042d87d539dd4 100644 --- a/drivers/pci/hotplug/pciehp_core.c +++ b/drivers/pci/hotplug/pciehp_core.c @@ -351,6 +351,8 @@ static struct pcie_port_service_driver hpdriver_portdrv = { .runtime_suspend = pciehp_runtime_suspend, .runtime_resume = pciehp_runtime_resume, #endif /* PM */ + + .slot_reset = pciehp_slot_reset, }; int __init pcie_hp_init(void) diff --git a/drivers/pci/hotplug/pciehp_hpc.c b/drivers/pci/hotplug/pciehp_hpc.c index 8bedbc77fe95ff..60098a701e83ad 100644 --- a/drivers/pci/hotplug/pciehp_hpc.c +++ b/drivers/pci/hotplug/pciehp_hpc.c @@ -865,6 +865,32 @@ void pcie_disable_interrupt(struct controller *ctrl) pcie_write_cmd(ctrl, 0, mask); } +/** + * pciehp_slot_reset() - ignore link event caused by error-induced hot reset + * @dev: PCI Express port service device + * + * Called from pcie_portdrv_slot_reset() after AER or DPC initiated a reset + * further up in the hierarchy to recover from an error. The reset was + * propagated down to this hotplug port. Ignore the resulting link flap. + * If the link failed to retrain successfully, synthesize the ignored event. + * Surprise removal during reset is detected through Presence Detect Changed. + */ +int pciehp_slot_reset(struct pcie_device *dev) +{ + struct controller *ctrl = get_service_data(dev); + + if (ctrl->state != ON_STATE) + return 0; + + pcie_capability_write_word(dev->port, PCI_EXP_SLTSTA, + PCI_EXP_SLTSTA_DLLSC); + + if (!pciehp_check_link_active(ctrl)) + pciehp_request(ctrl, PCI_EXP_SLTSTA_DLLSC); + + return 0; +} + /* * pciehp has a 1:1 bus:slot relationship so we ultimately want a secondary * bus reset of the bridge, but at the same time we want to ensure that it is diff --git a/drivers/pci/pcie/portdrv.h b/drivers/pci/pcie/portdrv.h index 6126ee4676a7fa..41fe1ffd590782 100644 --- a/drivers/pci/pcie/portdrv.h +++ b/drivers/pci/pcie/portdrv.h @@ -85,6 +85,8 @@ struct pcie_port_service_driver { int (*runtime_suspend)(struct pcie_device *dev); int (*runtime_resume)(struct pcie_device *dev); + int (*slot_reset)(struct pcie_device *dev); + /* Device driver may resume normal operations */ void (*error_resume)(struct pci_dev *dev); diff --git a/drivers/pci/pcie/portdrv_pci.c b/drivers/pci/pcie/portdrv_pci.c index c7ff1eea225abe..1af74c3d9d5db6 100644 --- a/drivers/pci/pcie/portdrv_pci.c +++ b/drivers/pci/pcie/portdrv_pci.c @@ -160,6 +160,9 @@ static pci_ers_result_t pcie_portdrv_error_detected(struct pci_dev *dev, static pci_ers_result_t pcie_portdrv_slot_reset(struct pci_dev *dev) { + size_t off = offsetof(struct pcie_port_service_driver, slot_reset); + device_for_each_child(&dev->dev, &off, pcie_port_device_iter); + pci_restore_state(dev); pci_save_state(dev); return PCI_ERS_RESULT_RECOVERED; From a4ac45aff8d38c64104aec21c6529747d94ae75a Mon Sep 17 00:00:00 2001 From: Sean Young Date: Tue, 14 Sep 2021 16:57:46 +0200 Subject: [PATCH 0300/1056] media: ir_toy: prevent device from hanging during transmit [ Upstream commit 4114978dcd24e72415276bba60ff4ff355970bbc ] If the IR Toy is receiving IR while a transmit is done, it may end up hanging. We can prevent this from happening by re-entering sample mode just before issuing the transmit command. Link: https://github.com/bengtmartensson/HarcHardware/discussions/25 Cc: stable@vger.kernel.org [mchehab: renamed: s/STATE_RESET/STATE_COMMAND_NO_RESP/ ] Signed-off-by: Sean Young Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin --- drivers/media/rc/ir_toy.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/rc/ir_toy.c b/drivers/media/rc/ir_toy.c index 7f394277478b32..53ae19fa103abe 100644 --- a/drivers/media/rc/ir_toy.c +++ b/drivers/media/rc/ir_toy.c @@ -310,7 +310,7 @@ static int irtoy_tx(struct rc_dev *rc, uint *txbuf, uint count) buf[i] = cpu_to_be16(v); } - buf[count] = cpu_to_be16(0xffff); + buf[count] = 0xffff; irtoy->tx_buf = buf; irtoy->tx_len = size; From 16b7cb2803bf088ed08e026f70fa9ac04f3c9800 Mon Sep 17 00:00:00 2001 From: Andrew Gabbasov Date: Wed, 22 Sep 2021 13:48:30 -0500 Subject: [PATCH 0301/1056] memory: renesas-rpc-if: Avoid unaligned bus access for HyperFlash [ Upstream commit 1869023e24c0de73a160a424dac4621cefd628ae ] HyperFlash devices in Renesas SoCs use 2-bytes addressing, according to HW manual paragraph 62.3.3 (which officially describes Serial Flash access, but seems to be applicable to HyperFlash too). And 1-byte bus read operations to 2-bytes unaligned addresses in external address space read mode work incorrectly (returns the other byte from the same word). Function memcpy_fromio(), used by the driver to read data from the bus, in ARM64 architecture (to which Renesas cores belong) uses 8-bytes bus accesses for appropriate aligned addresses, and 1-bytes accesses for other addresses. This results in incorrect data read from HyperFlash in unaligned cases. This issue can be reproduced using something like the following commands (where mtd1 is a parition on Hyperflash storage, defined properly in a device tree): [Correct fragment, read from Hyperflash] root@rcar-gen3:~# dd if=/dev/mtd1 of=/tmp/zz bs=32 count=1 root@rcar-gen3:~# hexdump -C /tmp/zz 00000000 f4 03 00 aa f5 03 01 aa f6 03 02 aa f7 03 03 aa |................| 00000010 00 00 80 d2 40 20 18 d5 00 06 81 d2 a0 18 a6 f2 |....@ ..........| 00000020 [Incorrect read of the same fragment: see the difference at offsets 8-11] root@rcar-gen3:~# dd if=/dev/mtd1 of=/tmp/zz bs=12 count=1 root@rcar-gen3:~# hexdump -C /tmp/zz 00000000 f4 03 00 aa f5 03 01 aa 03 03 aa aa |............| 0000000c Fix this issue by creating a local replacement of the copying function, that performs only properly aligned bus accesses, and is used for reading from HyperFlash. Fixes: ca7d8b980b67f ("memory: add Renesas RPC-IF driver") Cc: Signed-off-by: Andrew Gabbasov Link: https://lore.kernel.org/r/20210922184830.29147-1-andrew_gabbasov@mentor.com Signed-off-by: Krzysztof Kozlowski Signed-off-by: Sasha Levin --- drivers/memory/renesas-rpc-if.c | 48 +++++++++++++++++++++++++++++++-- 1 file changed, 46 insertions(+), 2 deletions(-) diff --git a/drivers/memory/renesas-rpc-if.c b/drivers/memory/renesas-rpc-if.c index 3a416705f61cb7..c77b23b68a9316 100644 --- a/drivers/memory/renesas-rpc-if.c +++ b/drivers/memory/renesas-rpc-if.c @@ -199,7 +199,6 @@ static int rpcif_reg_read(void *context, unsigned int reg, unsigned int *val) *val = readl(rpc->base + reg); return 0; - } static int rpcif_reg_write(void *context, unsigned int reg, unsigned int val) @@ -577,6 +576,48 @@ int rpcif_manual_xfer(struct rpcif *rpc) } EXPORT_SYMBOL(rpcif_manual_xfer); +static void memcpy_fromio_readw(void *to, + const void __iomem *from, + size_t count) +{ + const int maxw = (IS_ENABLED(CONFIG_64BIT)) ? 8 : 4; + u8 buf[2]; + + if (count && ((unsigned long)from & 1)) { + *(u16 *)buf = __raw_readw((void __iomem *)((unsigned long)from & ~1)); + *(u8 *)to = buf[1]; + from++; + to++; + count--; + } + while (count >= 2 && !IS_ALIGNED((unsigned long)from, maxw)) { + *(u16 *)to = __raw_readw(from); + from += 2; + to += 2; + count -= 2; + } + while (count >= maxw) { +#ifdef CONFIG_64BIT + *(u64 *)to = __raw_readq(from); +#else + *(u32 *)to = __raw_readl(from); +#endif + from += maxw; + to += maxw; + count -= maxw; + } + while (count >= 2) { + *(u16 *)to = __raw_readw(from); + from += 2; + to += 2; + count -= 2; + } + if (count) { + *(u16 *)buf = __raw_readw(from); + *(u8 *)to = buf[0]; + } +} + ssize_t rpcif_dirmap_read(struct rpcif *rpc, u64 offs, size_t len, void *buf) { loff_t from = offs & (RPCIF_DIRMAP_SIZE - 1); @@ -598,7 +639,10 @@ ssize_t rpcif_dirmap_read(struct rpcif *rpc, u64 offs, size_t len, void *buf) regmap_write(rpc->regmap, RPCIF_DRDMCR, rpc->dummy); regmap_write(rpc->regmap, RPCIF_DRDRENR, rpc->ddr); - memcpy_fromio(buf, rpc->dirmap + from, len); + if (rpc->bus_size == 2) + memcpy_fromio_readw(buf, rpc->dirmap + from, len); + else + memcpy_fromio(buf, rpc->dirmap + from, len); pm_runtime_put(rpc->dev); From 8a29aec244ae88a54655d5c4886d7c43b6af401f Mon Sep 17 00:00:00 2001 From: Seevalamuthu Mariappan Date: Wed, 17 Nov 2021 09:39:41 +0200 Subject: [PATCH 0302/1056] ath11k: add hw_param for wakeup_mhi [ Upstream commit 081e2d6476e30399433b509684d5da4d1844e430 ] Wakeup mhi is needed before pci_read/write only for QCA6390 and WCN6855. Since wakeup & release mhi is enabled for all hardwares, below mhi assert is seen in QCN9074 when doing 'rmmod ath11k_pci': Kernel panic - not syncing: dev_wake != 0 CPU: 2 PID: 13535 Comm: procd Not tainted 4.4.60 #1 Hardware name: Generic DT based system [<80316dac>] (unwind_backtrace) from [<80313700>] (show_stack+0x10/0x14) [<80313700>] (show_stack) from [<805135dc>] (dump_stack+0x7c/0x9c) [<805135dc>] (dump_stack) from [<8032136c>] (panic+0x84/0x1f8) [<8032136c>] (panic) from [<80549b24>] (mhi_pm_disable_transition+0x3b8/0x5b8) [<80549b24>] (mhi_pm_disable_transition) from [<80549ddc>] (mhi_power_down+0xb8/0x100) [<80549ddc>] (mhi_power_down) from [<7f5242b0>] (ath11k_mhi_op_status_cb+0x284/0x3ac [ath11k_pci]) [E][__mhi_device_get_sync] Did not enter M0 state, cur_state:RESET pm_state:SHUTDOWN Process [E][__mhi_device_get_sync] Did not enter M0 state, cur_state:RESET pm_state:SHUTDOWN Process [E][__mhi_device_get_sync] Did not enter M0 state, cur_state:RESET pm_state:SHUTDOWN Process [<7f5242b0>] (ath11k_mhi_op_status_cb [ath11k_pci]) from [<7f524878>] (ath11k_mhi_stop+0x10/0x20 [ath11k_pci]) [<7f524878>] (ath11k_mhi_stop [ath11k_pci]) from [<7f525b94>] (ath11k_pci_power_down+0x54/0x90 [ath11k_pci]) [<7f525b94>] (ath11k_pci_power_down [ath11k_pci]) from [<8056b2a8>] (pci_device_shutdown+0x30/0x44) [<8056b2a8>] (pci_device_shutdown) from [<805cfa0c>] (device_shutdown+0x124/0x174) [<805cfa0c>] (device_shutdown) from [<8033aaa4>] (kernel_restart+0xc/0x50) [<8033aaa4>] (kernel_restart) from [<8033ada8>] (SyS_reboot+0x178/0x1ec) [<8033ada8>] (SyS_reboot) from [<80301b80>] (ret_fast_syscall+0x0/0x34) Hence, disable wakeup/release mhi using hw_param for other hardwares. Tested-on: QCN9074 hw1.0 PCI WLAN.HK.2.5.0.1-01060-QCAHKSWPL_SILICONZ-1 Fixes: a05bd8513335 ("ath11k: read and write registers below unwindowed address") Signed-off-by: Seevalamuthu Mariappan Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/1636702019-26142-1-git-send-email-quic_seevalam@quicinc.com Signed-off-by: Sasha Levin --- drivers/net/wireless/ath/ath11k/core.c | 5 +++++ drivers/net/wireless/ath/ath11k/hw.h | 1 + drivers/net/wireless/ath/ath11k/pci.c | 12 ++++++++---- 3 files changed, 14 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/ath/ath11k/core.c b/drivers/net/wireless/ath/ath11k/core.c index 7dcf6b13f79491..48b4151e13a3e0 100644 --- a/drivers/net/wireless/ath/ath11k/core.c +++ b/drivers/net/wireless/ath/ath11k/core.c @@ -71,6 +71,7 @@ static const struct ath11k_hw_params ath11k_hw_params[] = { .supports_suspend = false, .hal_desc_sz = sizeof(struct hal_rx_desc_ipq8074), .fix_l1ss = true, + .wakeup_mhi = false, }, { .hw_rev = ATH11K_HW_IPQ6018_HW10, @@ -112,6 +113,7 @@ static const struct ath11k_hw_params ath11k_hw_params[] = { .supports_suspend = false, .hal_desc_sz = sizeof(struct hal_rx_desc_ipq8074), .fix_l1ss = true, + .wakeup_mhi = false, }, { .name = "qca6390 hw2.0", @@ -152,6 +154,7 @@ static const struct ath11k_hw_params ath11k_hw_params[] = { .supports_suspend = true, .hal_desc_sz = sizeof(struct hal_rx_desc_ipq8074), .fix_l1ss = true, + .wakeup_mhi = true, }, { .name = "qcn9074 hw1.0", @@ -190,6 +193,7 @@ static const struct ath11k_hw_params ath11k_hw_params[] = { .supports_suspend = false, .hal_desc_sz = sizeof(struct hal_rx_desc_qcn9074), .fix_l1ss = true, + .wakeup_mhi = false, }, { .name = "wcn6855 hw2.0", @@ -230,6 +234,7 @@ static const struct ath11k_hw_params ath11k_hw_params[] = { .supports_suspend = true, .hal_desc_sz = sizeof(struct hal_rx_desc_wcn6855), .fix_l1ss = false, + .wakeup_mhi = true, }, }; diff --git a/drivers/net/wireless/ath/ath11k/hw.h b/drivers/net/wireless/ath/ath11k/hw.h index 62f5978b30055a..4fe051625edfba 100644 --- a/drivers/net/wireless/ath/ath11k/hw.h +++ b/drivers/net/wireless/ath/ath11k/hw.h @@ -163,6 +163,7 @@ struct ath11k_hw_params { bool supports_suspend; u32 hal_desc_sz; bool fix_l1ss; + bool wakeup_mhi; }; struct ath11k_hw_ops { diff --git a/drivers/net/wireless/ath/ath11k/pci.c b/drivers/net/wireless/ath/ath11k/pci.c index 353a2d669fcd5d..7d0be9388f893e 100644 --- a/drivers/net/wireless/ath/ath11k/pci.c +++ b/drivers/net/wireless/ath/ath11k/pci.c @@ -182,7 +182,8 @@ void ath11k_pci_write32(struct ath11k_base *ab, u32 offset, u32 value) /* for offset beyond BAR + 4K - 32, may * need to wakeup MHI to access. */ - if (test_bit(ATH11K_PCI_FLAG_INIT_DONE, &ab_pci->flags) && + if (ab->hw_params.wakeup_mhi && + test_bit(ATH11K_PCI_FLAG_INIT_DONE, &ab_pci->flags) && offset >= ACCESS_ALWAYS_OFF) mhi_device_get_sync(ab_pci->mhi_ctrl->mhi_dev); @@ -206,7 +207,8 @@ void ath11k_pci_write32(struct ath11k_base *ab, u32 offset, u32 value) } } - if (test_bit(ATH11K_PCI_FLAG_INIT_DONE, &ab_pci->flags) && + if (ab->hw_params.wakeup_mhi && + test_bit(ATH11K_PCI_FLAG_INIT_DONE, &ab_pci->flags) && offset >= ACCESS_ALWAYS_OFF) mhi_device_put(ab_pci->mhi_ctrl->mhi_dev); } @@ -219,7 +221,8 @@ u32 ath11k_pci_read32(struct ath11k_base *ab, u32 offset) /* for offset beyond BAR + 4K - 32, may * need to wakeup MHI to access. */ - if (test_bit(ATH11K_PCI_FLAG_INIT_DONE, &ab_pci->flags) && + if (ab->hw_params.wakeup_mhi && + test_bit(ATH11K_PCI_FLAG_INIT_DONE, &ab_pci->flags) && offset >= ACCESS_ALWAYS_OFF) mhi_device_get_sync(ab_pci->mhi_ctrl->mhi_dev); @@ -243,7 +246,8 @@ u32 ath11k_pci_read32(struct ath11k_base *ab, u32 offset) } } - if (test_bit(ATH11K_PCI_FLAG_INIT_DONE, &ab_pci->flags) && + if (ab->hw_params.wakeup_mhi && + test_bit(ATH11K_PCI_FLAG_INIT_DONE, &ab_pci->flags) && offset >= ACCESS_ALWAYS_OFF) mhi_device_put(ab_pci->mhi_ctrl->mhi_dev); From a9a101842420fccd73f9831aa375551449c084be Mon Sep 17 00:00:00 2001 From: Shai Malin Date: Mon, 13 Sep 2021 10:50:24 +0300 Subject: [PATCH 0303/1056] qed: Improve the stack space of filter_config() [ Upstream commit f55e36d5ab76c3097ff36ecea60b91c6b0d80fc8 ] As it was reported and discussed in: https://lore.kernel.org/lkml/CAHk-=whF9F89vsfH8E9TGc0tZA-yhzi2Di8wOtquNB5vRkFX5w@mail.gmail.com/ This patch improves the stack space of qede_config_rx_mode() by splitting filter_config() to 3 functions and removing the union qed_filter_type_params. Reported-by: Naresh Kamboju Signed-off-by: Ariel Elior Signed-off-by: Shai Malin Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/qlogic/qed/qed_l2.c | 23 ++------- .../net/ethernet/qlogic/qede/qede_filter.c | 47 ++++++++----------- include/linux/qed/qed_eth_if.h | 21 ++++----- 3 files changed, 30 insertions(+), 61 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.c b/drivers/net/ethernet/qlogic/qed/qed_l2.c index dfaf10edfabfd4..ba8c7a31cce1fb 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_l2.c +++ b/drivers/net/ethernet/qlogic/qed/qed_l2.c @@ -2763,25 +2763,6 @@ static int qed_configure_filter_mcast(struct qed_dev *cdev, return qed_filter_mcast_cmd(cdev, &mcast, QED_SPQ_MODE_CB, NULL); } -static int qed_configure_filter(struct qed_dev *cdev, - struct qed_filter_params *params) -{ - enum qed_filter_rx_mode_type accept_flags; - - switch (params->type) { - case QED_FILTER_TYPE_UCAST: - return qed_configure_filter_ucast(cdev, ¶ms->filter.ucast); - case QED_FILTER_TYPE_MCAST: - return qed_configure_filter_mcast(cdev, ¶ms->filter.mcast); - case QED_FILTER_TYPE_RX_MODE: - accept_flags = params->filter.accept_flags; - return qed_configure_filter_rx_mode(cdev, accept_flags); - default: - DP_NOTICE(cdev, "Unknown filter type %d\n", (int)params->type); - return -EINVAL; - } -} - static int qed_configure_arfs_searcher(struct qed_dev *cdev, enum qed_filter_config_mode mode) { @@ -2904,7 +2885,9 @@ static const struct qed_eth_ops qed_eth_ops_pass = { .q_rx_stop = &qed_stop_rxq, .q_tx_start = &qed_start_txq, .q_tx_stop = &qed_stop_txq, - .filter_config = &qed_configure_filter, + .filter_config_rx_mode = &qed_configure_filter_rx_mode, + .filter_config_ucast = &qed_configure_filter_ucast, + .filter_config_mcast = &qed_configure_filter_mcast, .fastpath_stop = &qed_fastpath_stop, .eth_cqe_completion = &qed_fp_cqe_completion, .get_vport_stats = &qed_get_vport_stats, diff --git a/drivers/net/ethernet/qlogic/qede/qede_filter.c b/drivers/net/ethernet/qlogic/qede/qede_filter.c index a2e4dfb5cb44e7..f99b085b56a54c 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_filter.c +++ b/drivers/net/ethernet/qlogic/qede/qede_filter.c @@ -619,30 +619,28 @@ static int qede_set_ucast_rx_mac(struct qede_dev *edev, enum qed_filter_xcast_params_type opcode, unsigned char mac[ETH_ALEN]) { - struct qed_filter_params filter_cmd; + struct qed_filter_ucast_params ucast; - memset(&filter_cmd, 0, sizeof(filter_cmd)); - filter_cmd.type = QED_FILTER_TYPE_UCAST; - filter_cmd.filter.ucast.type = opcode; - filter_cmd.filter.ucast.mac_valid = 1; - ether_addr_copy(filter_cmd.filter.ucast.mac, mac); + memset(&ucast, 0, sizeof(ucast)); + ucast.type = opcode; + ucast.mac_valid = 1; + ether_addr_copy(ucast.mac, mac); - return edev->ops->filter_config(edev->cdev, &filter_cmd); + return edev->ops->filter_config_ucast(edev->cdev, &ucast); } static int qede_set_ucast_rx_vlan(struct qede_dev *edev, enum qed_filter_xcast_params_type opcode, u16 vid) { - struct qed_filter_params filter_cmd; + struct qed_filter_ucast_params ucast; - memset(&filter_cmd, 0, sizeof(filter_cmd)); - filter_cmd.type = QED_FILTER_TYPE_UCAST; - filter_cmd.filter.ucast.type = opcode; - filter_cmd.filter.ucast.vlan_valid = 1; - filter_cmd.filter.ucast.vlan = vid; + memset(&ucast, 0, sizeof(ucast)); + ucast.type = opcode; + ucast.vlan_valid = 1; + ucast.vlan = vid; - return edev->ops->filter_config(edev->cdev, &filter_cmd); + return edev->ops->filter_config_ucast(edev->cdev, &ucast); } static int qede_config_accept_any_vlan(struct qede_dev *edev, bool action) @@ -1057,18 +1055,17 @@ static int qede_set_mcast_rx_mac(struct qede_dev *edev, enum qed_filter_xcast_params_type opcode, unsigned char *mac, int num_macs) { - struct qed_filter_params filter_cmd; + struct qed_filter_mcast_params mcast; int i; - memset(&filter_cmd, 0, sizeof(filter_cmd)); - filter_cmd.type = QED_FILTER_TYPE_MCAST; - filter_cmd.filter.mcast.type = opcode; - filter_cmd.filter.mcast.num = num_macs; + memset(&mcast, 0, sizeof(mcast)); + mcast.type = opcode; + mcast.num = num_macs; for (i = 0; i < num_macs; i++, mac += ETH_ALEN) - ether_addr_copy(filter_cmd.filter.mcast.mac[i], mac); + ether_addr_copy(mcast.mac[i], mac); - return edev->ops->filter_config(edev->cdev, &filter_cmd); + return edev->ops->filter_config_mcast(edev->cdev, &mcast); } int qede_set_mac_addr(struct net_device *ndev, void *p) @@ -1194,7 +1191,6 @@ void qede_config_rx_mode(struct net_device *ndev) { enum qed_filter_rx_mode_type accept_flags; struct qede_dev *edev = netdev_priv(ndev); - struct qed_filter_params rx_mode; unsigned char *uc_macs, *temp; struct netdev_hw_addr *ha; int rc, uc_count; @@ -1220,10 +1216,6 @@ void qede_config_rx_mode(struct net_device *ndev) netif_addr_unlock_bh(ndev); - /* Configure the struct for the Rx mode */ - memset(&rx_mode, 0, sizeof(struct qed_filter_params)); - rx_mode.type = QED_FILTER_TYPE_RX_MODE; - /* Remove all previous unicast secondary macs and multicast macs * (configure / leave the primary mac) */ @@ -1271,8 +1263,7 @@ void qede_config_rx_mode(struct net_device *ndev) qede_config_accept_any_vlan(edev, false); } - rx_mode.filter.accept_flags = accept_flags; - edev->ops->filter_config(edev->cdev, &rx_mode); + edev->ops->filter_config_rx_mode(edev->cdev, accept_flags); out: kfree(uc_macs); } diff --git a/include/linux/qed/qed_eth_if.h b/include/linux/qed/qed_eth_if.h index 812a4d75116338..4df0bf0a0864e3 100644 --- a/include/linux/qed/qed_eth_if.h +++ b/include/linux/qed/qed_eth_if.h @@ -145,12 +145,6 @@ struct qed_filter_mcast_params { unsigned char mac[64][ETH_ALEN]; }; -union qed_filter_type_params { - enum qed_filter_rx_mode_type accept_flags; - struct qed_filter_ucast_params ucast; - struct qed_filter_mcast_params mcast; -}; - enum qed_filter_type { QED_FILTER_TYPE_UCAST, QED_FILTER_TYPE_MCAST, @@ -158,11 +152,6 @@ enum qed_filter_type { QED_MAX_FILTER_TYPES, }; -struct qed_filter_params { - enum qed_filter_type type; - union qed_filter_type_params filter; -}; - struct qed_tunn_params { u16 vxlan_port; u8 update_vxlan_port; @@ -314,8 +303,14 @@ struct qed_eth_ops { int (*q_tx_stop)(struct qed_dev *cdev, u8 rss_id, void *handle); - int (*filter_config)(struct qed_dev *cdev, - struct qed_filter_params *params); + int (*filter_config_rx_mode)(struct qed_dev *cdev, + enum qed_filter_rx_mode_type type); + + int (*filter_config_ucast)(struct qed_dev *cdev, + struct qed_filter_ucast_params *params); + + int (*filter_config_mcast)(struct qed_dev *cdev, + struct qed_filter_mcast_params *params); int (*fastpath_stop)(struct qed_dev *cdev); From 789382ce73596e548c4843082f69db149493fdaf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Barnab=C3=A1s=20P=C5=91cze?= Date: Sat, 4 Sep 2021 17:56:32 +0000 Subject: [PATCH 0304/1056] platform/x86: wmi: introduce helper to convert driver to WMI driver MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit e7b2e33449e22fdbaa0247d96f31543affe6163d ] Introduce a helper function which wraps the appropriate `container_of()` macro invocation to convert a `struct device_driver` to `struct wmi_driver`. Signed-off-by: Barnabás Pőcze Link: https://lore.kernel.org/r/20210904175450.156801-27-pobrn@protonmail.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede Signed-off-by: Sasha Levin --- drivers/platform/x86/wmi.c | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/drivers/platform/x86/wmi.c b/drivers/platform/x86/wmi.c index 1b65bb61ce8887..9aeb1a009097ce 100644 --- a/drivers/platform/x86/wmi.c +++ b/drivers/platform/x86/wmi.c @@ -676,6 +676,11 @@ static struct wmi_device *dev_to_wdev(struct device *dev) return container_of(dev, struct wmi_device, dev); } +static inline struct wmi_driver *drv_to_wdrv(struct device_driver *drv) +{ + return container_of(drv, struct wmi_driver, driver); +} + /* * sysfs interface */ @@ -794,8 +799,7 @@ static void wmi_dev_release(struct device *dev) static int wmi_dev_match(struct device *dev, struct device_driver *driver) { - struct wmi_driver *wmi_driver = - container_of(driver, struct wmi_driver, driver); + struct wmi_driver *wmi_driver = drv_to_wdrv(driver); struct wmi_block *wblock = dev_to_wblock(dev); const struct wmi_device_id *id = wmi_driver->id_table; @@ -892,8 +896,7 @@ static long wmi_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) } /* let the driver do any filtering and do the call */ - wdriver = container_of(wblock->dev.dev.driver, - struct wmi_driver, driver); + wdriver = drv_to_wdrv(wblock->dev.dev.driver); if (!try_module_get(wdriver->driver.owner)) { ret = -EBUSY; goto out_ioctl; @@ -926,8 +929,7 @@ static const struct file_operations wmi_fops = { static int wmi_dev_probe(struct device *dev) { struct wmi_block *wblock = dev_to_wblock(dev); - struct wmi_driver *wdriver = - container_of(dev->driver, struct wmi_driver, driver); + struct wmi_driver *wdriver = drv_to_wdrv(dev->driver); int ret = 0; char *buf; @@ -990,8 +992,7 @@ static int wmi_dev_probe(struct device *dev) static void wmi_dev_remove(struct device *dev) { struct wmi_block *wblock = dev_to_wblock(dev); - struct wmi_driver *wdriver = - container_of(dev->driver, struct wmi_driver, driver); + struct wmi_driver *wdriver = drv_to_wdrv(dev->driver); if (wdriver->filter_callback) { misc_deregister(&wblock->char_dev); @@ -1296,15 +1297,12 @@ static void acpi_wmi_notify_handler(acpi_handle handle, u32 event, /* If a driver is bound, then notify the driver. */ if (wblock->dev.dev.driver) { - struct wmi_driver *driver; + struct wmi_driver *driver = drv_to_wdrv(wblock->dev.dev.driver); struct acpi_object_list input; union acpi_object params[1]; struct acpi_buffer evdata = { ACPI_ALLOCATE_BUFFER, NULL }; acpi_status status; - driver = container_of(wblock->dev.dev.driver, - struct wmi_driver, driver); - input.count = 1; input.pointer = params; params[0].type = ACPI_TYPE_INTEGER; From 4b53562319894fad2b13883562af316c6efaa48a Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sun, 28 Nov 2021 20:00:27 +0100 Subject: [PATCH 0305/1056] platform/x86: wmi: Replace read_takes_no_args with a flags field [ Upstream commit a90b38c58667142ecff2521481ed44286d46b140 ] Replace the wmi_block.read_takes_no_args bool field with an unsigned long flags field, used together with test_bit() and friends. This is a preparation patch for fixing a driver->notify() vs ->probe() race, which requires atomic flag handling. Reviewed-by: Andy Shevchenko Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20211128190031.405620-1-hdegoede@redhat.com Signed-off-by: Sasha Levin --- drivers/platform/x86/wmi.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/platform/x86/wmi.c b/drivers/platform/x86/wmi.c index 9aeb1a009097ce..a00b72ace6d2b9 100644 --- a/drivers/platform/x86/wmi.c +++ b/drivers/platform/x86/wmi.c @@ -51,6 +51,10 @@ struct guid_block { u8 flags; }; +enum { /* wmi_block flags */ + WMI_READ_TAKES_NO_ARGS, +}; + struct wmi_block { struct wmi_device dev; struct list_head list; @@ -61,8 +65,7 @@ struct wmi_block { wmi_notify_handler handler; void *handler_data; u64 req_buf_size; - - bool read_takes_no_args; + unsigned long flags; }; @@ -325,7 +328,7 @@ static acpi_status __query_block(struct wmi_block *wblock, u8 instance, wq_params[0].type = ACPI_TYPE_INTEGER; wq_params[0].integer.value = instance; - if (instance == 0 && wblock->read_takes_no_args) + if (instance == 0 && test_bit(WMI_READ_TAKES_NO_ARGS, &wblock->flags)) input.count = 0; /* @@ -1087,7 +1090,7 @@ static int wmi_create_device(struct device *wmi_bus_dev, * laptops, WQxx may not be a method at all.) */ if (info->type != ACPI_TYPE_METHOD || info->param_count == 0) - wblock->read_takes_no_args = true; + set_bit(WMI_READ_TAKES_NO_ARGS, &wblock->flags); kfree(info); From 9846b9e4bba796564f90f7c4555f429a5395bee6 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sun, 28 Nov 2021 20:00:28 +0100 Subject: [PATCH 0306/1056] platform/x86: wmi: Fix driver->notify() vs ->probe() race [ Upstream commit 9918878676a5f9e99b98679f04b9e6c0f5426b0a ] The driver core sets struct device->driver before calling out to the bus' probe() method, this leaves a window where an ACPI notify may happen on the WMI object before the driver's probe() method has completed running, causing e.g. the driver's notify() callback to get called with drvdata not yet being set leading to a NULL pointer deref. At a check for this to the WMI core, ensuring that the notify() callback is not called before the driver is ready. Fixes: 1686f5444546 ("platform/x86: wmi: Incorporate acpi_install_notify_handler") Reviewed-by: Andy Shevchenko Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20211128190031.405620-2-hdegoede@redhat.com Signed-off-by: Sasha Levin --- drivers/platform/x86/wmi.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/platform/x86/wmi.c b/drivers/platform/x86/wmi.c index a00b72ace6d2b9..c4f917d45b51d6 100644 --- a/drivers/platform/x86/wmi.c +++ b/drivers/platform/x86/wmi.c @@ -53,6 +53,7 @@ struct guid_block { enum { /* wmi_block flags */ WMI_READ_TAKES_NO_ARGS, + WMI_PROBED, }; struct wmi_block { @@ -980,6 +981,7 @@ static int wmi_dev_probe(struct device *dev) } } + set_bit(WMI_PROBED, &wblock->flags); return 0; probe_misc_failure: @@ -997,6 +999,8 @@ static void wmi_dev_remove(struct device *dev) struct wmi_block *wblock = dev_to_wblock(dev); struct wmi_driver *wdriver = drv_to_wdrv(dev->driver); + clear_bit(WMI_PROBED, &wblock->flags); + if (wdriver->filter_callback) { misc_deregister(&wblock->char_dev); kfree(wblock->char_dev.name); @@ -1299,7 +1303,7 @@ static void acpi_wmi_notify_handler(acpi_handle handle, u32 event, return; /* If a driver is bound, then notify the driver. */ - if (wblock->dev.dev.driver) { + if (test_bit(WMI_PROBED, &wblock->flags) && wblock->dev.dev.driver) { struct wmi_driver *driver = drv_to_wdrv(wblock->dev.dev.driver); struct acpi_object_list input; union acpi_object params[1]; From 09aee8375b0cc8e1cb54fd61e1b3764b99d96804 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Mon, 9 Aug 2021 10:38:03 +0200 Subject: [PATCH 0307/1056] mt76: mt7921: get rid of mt7921_mac_set_beacon_filter [ Upstream commit b30363102a4122f6eed37927b64a2c7ac70b8859 ] Remove mt7921_mac_set_beacon_filter routine since it is no longer used. Signed-off-by: Lorenzo Bianconi Signed-off-by: Felix Fietkau Signed-off-by: Sasha Levin --- .../net/wireless/mediatek/mt76/mt7921/mac.c | 28 ------------------- .../wireless/mediatek/mt76/mt7921/mt7921.h | 3 -- 2 files changed, 31 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mac.c b/drivers/net/wireless/mediatek/mt76/mt7921/mac.c index bef8d4a76ed90e..426e7a32bdc867 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7921/mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt7921/mac.c @@ -1573,34 +1573,6 @@ void mt7921_pm_power_save_work(struct work_struct *work) queue_delayed_work(dev->mt76.wq, &dev->pm.ps_work, delta); } -int mt7921_mac_set_beacon_filter(struct mt7921_phy *phy, - struct ieee80211_vif *vif, - bool enable) -{ - struct mt7921_dev *dev = phy->dev; - bool ext_phy = phy != &dev->phy; - int err; - - if (!dev->pm.enable) - return -EOPNOTSUPP; - - err = mt7921_mcu_set_bss_pm(dev, vif, enable); - if (err) - return err; - - if (enable) { - vif->driver_flags |= IEEE80211_VIF_BEACON_FILTER; - mt76_set(dev, MT_WF_RFCR(ext_phy), - MT_WF_RFCR_DROP_OTHER_BEACON); - } else { - vif->driver_flags &= ~IEEE80211_VIF_BEACON_FILTER; - mt76_clear(dev, MT_WF_RFCR(ext_phy), - MT_WF_RFCR_DROP_OTHER_BEACON); - } - - return 0; -} - void mt7921_coredump_work(struct work_struct *work) { struct mt7921_dev *dev; diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h b/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h index 2d8bd6bfc820ac..1aafd8723b3a56 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h +++ b/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h @@ -381,9 +381,6 @@ int mt7921_mcu_fw_pmctrl(struct mt7921_dev *dev); void mt7921_pm_wake_work(struct work_struct *work); void mt7921_pm_power_save_work(struct work_struct *work); bool mt7921_wait_for_mcu_init(struct mt7921_dev *dev); -int mt7921_mac_set_beacon_filter(struct mt7921_phy *phy, - struct ieee80211_vif *vif, - bool enable); void mt7921_pm_interface_iter(void *priv, u8 *mac, struct ieee80211_vif *vif); void mt7921_coredump_work(struct work_struct *work); int mt7921_wfsys_reset(struct mt7921_dev *dev); From 281a194f5a67b3b04f7fb1d2179ac4af358a1439 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Mon, 9 Aug 2021 12:37:22 +0200 Subject: [PATCH 0308/1056] mt76: mt7921: introduce mt7921_mcu_set_beacon_filter utility routine [ Upstream commit 890809ca1986e63d29dd1591090af67b655ed89c ] Introduce mt7921_mcu_set_beacon_filter utility routine in order to remove duplicated code for hw beacon filtering. Move mt7921_pm_interface_iter in debugfs since it is just used there. Make the following routine static: - mt7921_pm_interface_iter - mt7921_mcu_uni_bss_bcnft - mt7921_mcu_set_bss_pm Signed-off-by: Lorenzo Bianconi Signed-off-by: Felix Fietkau Signed-off-by: Sasha Levin --- .../wireless/mediatek/mt76/mt7921/debugfs.c | 20 +++++--- .../net/wireless/mediatek/mt76/mt7921/main.c | 33 +------------ .../net/wireless/mediatek/mt76/mt7921/mcu.c | 47 ++++++++++--------- .../wireless/mediatek/mt76/mt7921/mt7921.h | 8 ++-- 4 files changed, 45 insertions(+), 63 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/debugfs.c b/drivers/net/wireless/mediatek/mt76/mt7921/debugfs.c index 8d5e261cd10f6c..82fb2585f41387 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7921/debugfs.c +++ b/drivers/net/wireless/mediatek/mt76/mt7921/debugfs.c @@ -262,30 +262,38 @@ mt7921_txpwr(struct seq_file *s, void *data) return 0; } +static void +mt7921_pm_interface_iter(void *priv, u8 *mac, struct ieee80211_vif *vif) +{ + struct mt7921_dev *dev = priv; + + mt7921_mcu_set_beacon_filter(dev, vif, dev->pm.enable); +} + static int mt7921_pm_set(void *data, u64 val) { struct mt7921_dev *dev = data; struct mt76_connac_pm *pm = &dev->pm; - struct mt76_phy *mphy = dev->phy.mt76; - - if (val == pm->enable) - return 0; mt7921_mutex_acquire(dev); + if (val == pm->enable) + goto out; + if (!pm->enable) { pm->stats.last_wake_event = jiffies; pm->stats.last_doze_event = jiffies; } pm->enable = val; - ieee80211_iterate_active_interfaces(mphy->hw, + ieee80211_iterate_active_interfaces(mt76_hw(dev), IEEE80211_IFACE_ITER_RESUME_ALL, - mt7921_pm_interface_iter, mphy->priv); + mt7921_pm_interface_iter, dev); mt76_connac_mcu_set_deep_sleep(&dev->mt76, pm->ds_enable); +out: mt7921_mutex_release(dev); return 0; diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/main.c b/drivers/net/wireless/mediatek/mt76/mt7921/main.c index 30252f408ddc46..13a7ae3d83516e 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7921/main.c +++ b/drivers/net/wireless/mediatek/mt76/mt7921/main.c @@ -528,36 +528,6 @@ static void mt7921_configure_filter(struct ieee80211_hw *hw, mt7921_mutex_release(dev); } -static int -mt7921_bss_bcnft_apply(struct mt7921_dev *dev, struct ieee80211_vif *vif, - bool assoc) -{ - int ret; - - if (!dev->pm.enable) - return 0; - - if (assoc) { - ret = mt7921_mcu_uni_bss_bcnft(dev, vif, true); - if (ret) - return ret; - - vif->driver_flags |= IEEE80211_VIF_BEACON_FILTER; - mt76_set(dev, MT_WF_RFCR(0), MT_WF_RFCR_DROP_OTHER_BEACON); - - return 0; - } - - ret = mt7921_mcu_set_bss_pm(dev, vif, false); - if (ret) - return ret; - - vif->driver_flags &= ~IEEE80211_VIF_BEACON_FILTER; - mt76_clear(dev, MT_WF_RFCR(0), MT_WF_RFCR_DROP_OTHER_BEACON); - - return 0; -} - static void mt7921_bss_info_changed(struct ieee80211_hw *hw, struct ieee80211_vif *vif, struct ieee80211_bss_conf *info, @@ -587,7 +557,8 @@ static void mt7921_bss_info_changed(struct ieee80211_hw *hw, if (changed & BSS_CHANGED_ASSOC) { mt7921_mcu_sta_update(dev, NULL, vif, true, MT76_STA_INFO_STATE_ASSOC); - mt7921_bss_bcnft_apply(dev, vif, info->assoc); + if (dev->pm.enable) + mt7921_mcu_set_beacon_filter(dev, vif, info->assoc); } if (changed & BSS_CHANGED_ARP_FILTER) { diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c index 4119f8efd896b5..dabc0de2ec65dd 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c +++ b/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c @@ -1205,8 +1205,9 @@ int mt7921_mcu_uni_bss_ps(struct mt7921_dev *dev, struct ieee80211_vif *vif) &ps_req, sizeof(ps_req), true); } -int mt7921_mcu_uni_bss_bcnft(struct mt7921_dev *dev, struct ieee80211_vif *vif, - bool enable) +static int +mt7921_mcu_uni_bss_bcnft(struct mt7921_dev *dev, struct ieee80211_vif *vif, + bool enable) { struct mt7921_vif *mvif = (struct mt7921_vif *)vif->drv_priv; struct { @@ -1240,8 +1241,9 @@ int mt7921_mcu_uni_bss_bcnft(struct mt7921_dev *dev, struct ieee80211_vif *vif, &bcnft_req, sizeof(bcnft_req), true); } -int mt7921_mcu_set_bss_pm(struct mt7921_dev *dev, struct ieee80211_vif *vif, - bool enable) +static int +mt7921_mcu_set_bss_pm(struct mt7921_dev *dev, struct ieee80211_vif *vif, + bool enable) { struct mt7921_vif *mvif = (struct mt7921_vif *)vif->drv_priv; struct { @@ -1390,31 +1392,34 @@ int mt7921_mcu_fw_pmctrl(struct mt7921_dev *dev) return err; } -void -mt7921_pm_interface_iter(void *priv, u8 *mac, struct ieee80211_vif *vif) +int mt7921_mcu_set_beacon_filter(struct mt7921_dev *dev, + struct ieee80211_vif *vif, + bool enable) { - struct mt7921_phy *phy = priv; - struct mt7921_dev *dev = phy->dev; struct ieee80211_hw *hw = mt76_hw(dev); - int ret; - - if (dev->pm.enable) - ret = mt7921_mcu_uni_bss_bcnft(dev, vif, true); - else - ret = mt7921_mcu_set_bss_pm(dev, vif, false); + int err; - if (ret) - return; + if (enable) { + err = mt7921_mcu_uni_bss_bcnft(dev, vif, true); + if (err) + return err; - if (dev->pm.enable) { vif->driver_flags |= IEEE80211_VIF_BEACON_FILTER; ieee80211_hw_set(hw, CONNECTION_MONITOR); mt76_set(dev, MT_WF_RFCR(0), MT_WF_RFCR_DROP_OTHER_BEACON); - } else { - vif->driver_flags &= ~IEEE80211_VIF_BEACON_FILTER; - __clear_bit(IEEE80211_HW_CONNECTION_MONITOR, hw->flags); - mt76_clear(dev, MT_WF_RFCR(0), MT_WF_RFCR_DROP_OTHER_BEACON); + + return 0; } + + err = mt7921_mcu_set_bss_pm(dev, vif, false); + if (err) + return err; + + vif->driver_flags &= ~IEEE80211_VIF_BEACON_FILTER; + __clear_bit(IEEE80211_HW_CONNECTION_MONITOR, hw->flags); + mt76_clear(dev, MT_WF_RFCR(0), MT_WF_RFCR_DROP_OTHER_BEACON); + + return 0; } int mt7921_get_txpwr_info(struct mt7921_dev *dev, struct mt7921_txpwr *txpwr) diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h b/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h index 1aafd8723b3a56..32d4f2cab94e2f 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h +++ b/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h @@ -363,6 +363,9 @@ void mt7921_set_stream_he_caps(struct mt7921_phy *phy); void mt7921_update_channel(struct mt76_phy *mphy); int mt7921_init_debugfs(struct mt7921_dev *dev); +int mt7921_mcu_set_beacon_filter(struct mt7921_dev *dev, + struct ieee80211_vif *vif, + bool enable); int mt7921_mcu_uni_tx_ba(struct mt7921_dev *dev, struct ieee80211_ampdu_params *params, bool enable); @@ -371,17 +374,12 @@ int mt7921_mcu_uni_rx_ba(struct mt7921_dev *dev, bool enable); void mt7921_scan_work(struct work_struct *work); int mt7921_mcu_uni_bss_ps(struct mt7921_dev *dev, struct ieee80211_vif *vif); -int mt7921_mcu_uni_bss_bcnft(struct mt7921_dev *dev, struct ieee80211_vif *vif, - bool enable); -int mt7921_mcu_set_bss_pm(struct mt7921_dev *dev, struct ieee80211_vif *vif, - bool enable); int __mt7921_mcu_drv_pmctrl(struct mt7921_dev *dev); int mt7921_mcu_drv_pmctrl(struct mt7921_dev *dev); int mt7921_mcu_fw_pmctrl(struct mt7921_dev *dev); void mt7921_pm_wake_work(struct work_struct *work); void mt7921_pm_power_save_work(struct work_struct *work); bool mt7921_wait_for_mcu_init(struct mt7921_dev *dev); -void mt7921_pm_interface_iter(void *priv, u8 *mac, struct ieee80211_vif *vif); void mt7921_coredump_work(struct work_struct *work); int mt7921_wfsys_reset(struct mt7921_dev *dev); int mt7921_get_txpwr_info(struct mt7921_dev *dev, struct mt7921_txpwr *txpwr); From d3688bfa5af4557e617f5da2373ccf32c999311e Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Tue, 21 Dec 2021 14:57:09 +0100 Subject: [PATCH 0309/1056] mt76: mt7921: fix a possible race enabling/disabling runtime-pm [ Upstream commit d430dffbe9dd30759f3c64b65bf85b0245c8d8ab ] Fix a possible race enabling/disabling runtime-pm between mt7921_pm_set() and mt7921_poll_rx() since mt7921_pm_wake_work() always schedules rx-napi callback and it will trigger mt7921_pm_power_save_work routine putting chip to in low-power state during mt7921_pm_set processing. Suggested-by: Deren Wu Tested-by: Deren Wu Fixes: 1d8efc741df8 ("mt76: mt7921: introduce Runtime PM support") Signed-off-by: Lorenzo Bianconi Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/0f3e075a2033dc05f09dab4059e5be8cbdccc239.1640094847.git.lorenzo@kernel.org Signed-off-by: Sasha Levin --- drivers/net/wireless/mediatek/mt76/mt76_connac_mac.c | 3 --- drivers/net/wireless/mediatek/mt76/mt7921/debugfs.c | 12 +++++++++--- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt76_connac_mac.c b/drivers/net/wireless/mediatek/mt76/mt76_connac_mac.c index af43bcb5457817..306e9eaea9177e 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76_connac_mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt76_connac_mac.c @@ -7,9 +7,6 @@ int mt76_connac_pm_wake(struct mt76_phy *phy, struct mt76_connac_pm *pm) { struct mt76_dev *dev = phy->dev; - if (!pm->enable) - return 0; - if (mt76_is_usb(dev)) return 0; diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/debugfs.c b/drivers/net/wireless/mediatek/mt76/mt7921/debugfs.c index 82fb2585f41387..b9f25599227d25 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7921/debugfs.c +++ b/drivers/net/wireless/mediatek/mt76/mt7921/debugfs.c @@ -276,7 +276,7 @@ mt7921_pm_set(void *data, u64 val) struct mt7921_dev *dev = data; struct mt76_connac_pm *pm = &dev->pm; - mt7921_mutex_acquire(dev); + mutex_lock(&dev->mt76.mutex); if (val == pm->enable) goto out; @@ -285,7 +285,11 @@ mt7921_pm_set(void *data, u64 val) pm->stats.last_wake_event = jiffies; pm->stats.last_doze_event = jiffies; } - pm->enable = val; + /* make sure the chip is awake here and ps_work is scheduled + * just at end of the this routine. + */ + pm->enable = false; + mt76_connac_pm_wake(&dev->mphy, pm); ieee80211_iterate_active_interfaces(mt76_hw(dev), IEEE80211_IFACE_ITER_RESUME_ALL, @@ -293,8 +297,10 @@ mt7921_pm_set(void *data, u64 val) mt76_connac_mcu_set_deep_sleep(&dev->mt76, pm->ds_enable); + pm->enable = val; + mt76_connac_power_save_sched(&dev->mphy, pm); out: - mt7921_mutex_release(dev); + mutex_unlock(&dev->mt76.mutex); return 0; } From d53c8fe9ee2919a1b82c91b46ec74bfd9caa4a64 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Fri, 5 Nov 2021 18:40:14 -0700 Subject: [PATCH 0310/1056] bpf: Stop caching subprog index in the bpf_pseudo_func insn commit 3990ed4c426652fcd469f8c9dc08156294b36c28 upstream. This patch is to fix an out-of-bound access issue when jit-ing the bpf_pseudo_func insn (i.e. ld_imm64 with src_reg == BPF_PSEUDO_FUNC) In jit_subprog(), it currently reuses the subprog index cached in insn[1].imm. This subprog index is an index into a few array related to subprogs. For example, in jit_subprog(), it is an index to the newly allocated 'struct bpf_prog **func' array. The subprog index was cached in insn[1].imm after add_subprog(). However, this could become outdated (and too big in this case) if some subprogs are completely removed during dead code elimination (in adjust_subprog_starts_after_remove). The cached index in insn[1].imm is not updated accordingly and causing out-of-bound issue in the later jit_subprog(). Unlike bpf_pseudo_'func' insn, the current bpf_pseudo_'call' insn is handling the DCE properly by calling find_subprog(insn->imm) to figure out the index instead of caching the subprog index. The existing bpf_adj_branches() will adjust the insn->imm whenever insn is added or removed. Instead of having two ways handling subprog index, this patch is to make bpf_pseudo_func works more like bpf_pseudo_call. First change is to stop caching the subprog index result in insn[1].imm after add_subprog(). The verification process will use find_subprog(insn->imm) to figure out the subprog index. Second change is in bpf_adj_branches() and have it to adjust the insn->imm for the bpf_pseudo_func insn also whenever insn is added or removed. Third change is in jit_subprog(). Like the bpf_pseudo_call handling, bpf_pseudo_func temporarily stores the find_subprog() result in insn->off. It is fine because the prog's insn has been finalized at this point. insn->off will be reset back to 0 later to avoid confusing the userspace prog dump tool. Fixes: 69c087ba6225 ("bpf: Add bpf_for_each_map_elem() helper") Signed-off-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20211106014014.651018-1-kafai@fb.com Cc: Jon Hunter Signed-off-by: Greg Kroah-Hartman --- include/linux/bpf.h | 6 ++++++ kernel/bpf/core.c | 7 +++++++ kernel/bpf/verifier.c | 37 ++++++++++++++----------------------- 3 files changed, 27 insertions(+), 23 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index bb766a0b9b51e4..818cd594e9229d 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -533,6 +533,12 @@ bpf_ctx_record_field_size(struct bpf_insn_access_aux *aux, u32 size) aux->ctx_field_size = size; } +static inline bool bpf_pseudo_func(const struct bpf_insn *insn) +{ + return insn->code == (BPF_LD | BPF_IMM | BPF_DW) && + insn->src_reg == BPF_PSEUDO_FUNC; +} + struct bpf_prog_ops { int (*test_run)(struct bpf_prog *prog, const union bpf_attr *kattr, union bpf_attr __user *uattr); diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 48eb9c329da605..15946c11524e70 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -389,6 +389,13 @@ static int bpf_adj_branches(struct bpf_prog *prog, u32 pos, s32 end_old, i = end_new; insn = prog->insnsi + end_old; } + if (bpf_pseudo_func(insn)) { + ret = bpf_adj_delta_to_imm(insn, pos, end_old, + end_new, i, probe_pass); + if (ret) + return ret; + continue; + } code = insn->code; if ((BPF_CLASS(code) != BPF_JMP && BPF_CLASS(code) != BPF_JMP32) || diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index b72ed3ae00d847..346d36c905a90a 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -240,12 +240,6 @@ static bool bpf_pseudo_kfunc_call(const struct bpf_insn *insn) insn->src_reg == BPF_PSEUDO_KFUNC_CALL; } -static bool bpf_pseudo_func(const struct bpf_insn *insn) -{ - return insn->code == (BPF_LD | BPF_IMM | BPF_DW) && - insn->src_reg == BPF_PSEUDO_FUNC; -} - struct bpf_call_arg_meta { struct bpf_map *map_ptr; bool raw_mode; @@ -1788,16 +1782,10 @@ static int add_subprog_and_kfunc(struct bpf_verifier_env *env) return -EPERM; } - if (bpf_pseudo_func(insn)) { + if (bpf_pseudo_func(insn) || bpf_pseudo_call(insn)) ret = add_subprog(env, i + insn->imm + 1); - if (ret >= 0) - /* remember subprog */ - insn[1].imm = ret; - } else if (bpf_pseudo_call(insn)) { - ret = add_subprog(env, i + insn->imm + 1); - } else { + else ret = add_kfunc_call(env, insn->imm); - } if (ret < 0) return ret; @@ -9222,7 +9210,8 @@ static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn) if (insn->src_reg == BPF_PSEUDO_FUNC) { struct bpf_prog_aux *aux = env->prog->aux; - u32 subprogno = insn[1].imm; + u32 subprogno = find_subprog(env, + env->insn_idx + insn->imm + 1); if (!aux->func_info) { verbose(env, "missing btf func_info\n"); @@ -12392,14 +12381,9 @@ static int jit_subprogs(struct bpf_verifier_env *env) return 0; for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) { - if (bpf_pseudo_func(insn)) { - env->insn_aux_data[i].call_imm = insn->imm; - /* subprog is encoded in insn[1].imm */ + if (!bpf_pseudo_func(insn) && !bpf_pseudo_call(insn)) continue; - } - if (!bpf_pseudo_call(insn)) - continue; /* Upon error here we cannot fall back to interpreter but * need a hard reject of the program. Thus -EFAULT is * propagated in any case. @@ -12420,6 +12404,12 @@ static int jit_subprogs(struct bpf_verifier_env *env) env->insn_aux_data[i].call_imm = insn->imm; /* point imm to __bpf_call_base+1 from JITs point of view */ insn->imm = 1; + if (bpf_pseudo_func(insn)) + /* jit (e.g. x86_64) may emit fewer instructions + * if it learns a u32 imm is the same as a u64 imm. + * Force a non zero here. + */ + insn[1].imm = 1; } err = bpf_prog_alloc_jited_linfo(prog); @@ -12503,7 +12493,7 @@ static int jit_subprogs(struct bpf_verifier_env *env) insn = func[i]->insnsi; for (j = 0; j < func[i]->len; j++, insn++) { if (bpf_pseudo_func(insn)) { - subprog = insn[1].imm; + subprog = insn->off; insn[0].imm = (u32)(long)func[subprog]->bpf_func; insn[1].imm = ((u64)(long)func[subprog]->bpf_func) >> 32; continue; @@ -12555,7 +12545,8 @@ static int jit_subprogs(struct bpf_verifier_env *env) for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) { if (bpf_pseudo_func(insn)) { insn[0].imm = env->insn_aux_data[i].call_imm; - insn[1].imm = find_subprog(env, i + insn[0].imm + 1); + insn[1].imm = insn->off; + insn->off = 0; continue; } if (!bpf_pseudo_call(insn)) From feacd73fd8b2ef6fa48655af1e56ab95140bac50 Mon Sep 17 00:00:00 2001 From: Hou Tao Date: Fri, 31 Dec 2021 23:10:18 +0800 Subject: [PATCH 0311/1056] bpf, arm64: Use emit_addr_mov_i64() for BPF_PSEUDO_FUNC [ Upstream commit e4a41c2c1fa916547e63440c73a51a5eb06247af ] The following error is reported when running "./test_progs -t for_each" under arm64: bpf_jit: multi-func JIT bug 58 != 56 [...] JIT doesn't support bpf-to-bpf calls The root cause is the size of BPF_PSEUDO_FUNC instruction increases from 2 to 3 after the address of called bpf-function is settled and there are two bpf-to-bpf calls in test_pkt_access. The generated instructions are shown below: 0x48: 21 00 C0 D2 movz x1, #0x1, lsl #32 0x4c: 21 00 80 F2 movk x1, #0x1 0x48: E1 3F C0 92 movn x1, #0x1ff, lsl #32 0x4c: 41 FE A2 F2 movk x1, #0x17f2, lsl #16 0x50: 81 70 9F F2 movk x1, #0xfb84 Fixing it by using emit_addr_mov_i64() for BPF_PSEUDO_FUNC, so the size of jited image will not change. Fixes: 69c087ba6225 ("bpf: Add bpf_for_each_map_elem() helper") Signed-off-by: Hou Tao Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20211231151018.3781550-1-houtao1@huawei.com Signed-off-by: Sasha Levin --- arch/arm64/net/bpf_jit_comp.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index 95439bbe5df8f5..4895b4d7e150f5 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -788,7 +788,10 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, u64 imm64; imm64 = (u64)insn1.imm << 32 | (u32)imm; - emit_a64_mov_i64(dst, imm64, ctx); + if (bpf_pseudo_func(insn)) + emit_addr_mov_i64(dst, imm64, ctx); + else + emit_a64_mov_i64(dst, imm64, ctx); return 1; } From 49ae6abd617ff8c674bd22202b33ea47d185ed01 Mon Sep 17 00:00:00 2001 From: Heinrich Schuchardt Date: Tue, 12 Oct 2021 18:46:58 +0200 Subject: [PATCH 0312/1056] riscv: defconfig: enable DRM_NOUVEAU [ Upstream commit ffa7a9141bb70702744a312f904b190ca064bdd7 ] Both RADEON and NOUVEAU graphics cards are supported on RISC-V. Enabling the one and not the other does not make sense. As typically at most one of RADEON, NOUVEAU, or VIRTIO GPU support will be needed DRM drivers should be compiled as modules. Signed-off-by: Heinrich Schuchardt Signed-off-by: Palmer Dabbelt Signed-off-by: Sasha Levin --- arch/riscv/configs/defconfig | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/riscv/configs/defconfig b/arch/riscv/configs/defconfig index 4ebc80315f0135..c252fd5706d20d 100644 --- a/arch/riscv/configs/defconfig +++ b/arch/riscv/configs/defconfig @@ -72,9 +72,10 @@ CONFIG_GPIOLIB=y CONFIG_GPIO_SIFIVE=y # CONFIG_PTP_1588_CLOCK is not set CONFIG_POWER_RESET=y -CONFIG_DRM=y -CONFIG_DRM_RADEON=y -CONFIG_DRM_VIRTIO_GPU=y +CONFIG_DRM=m +CONFIG_DRM_RADEON=m +CONFIG_DRM_NOUVEAU=m +CONFIG_DRM_VIRTIO_GPU=m CONFIG_FRAMEBUFFER_CONSOLE=y CONFIG_USB=y CONFIG_USB_XHCI_HCD=y From 910349170ac0b9685c82d7d4d0e79b97164dd364 Mon Sep 17 00:00:00 2001 From: Palmer Dabbelt Date: Fri, 19 Nov 2021 08:44:02 -0800 Subject: [PATCH 0313/1056] RISC-V: defconfigs: Set CONFIG_FB=y, for FB console [ Upstream commit 3d12b634fe8206ea974c6061a3f3eea529ffbc48 ] We have CONFIG_FRAMEBUFFER_CONSOLE=y in the defconfigs, but that depends on CONFIG_FB so it's not actually getting set. I'm assuming most users on real systems want a framebuffer console, so this enables CONFIG_FB to allow that to take effect. Fixes: 33c57c0d3c67 ("RISC-V: Add a basic defconfig") Reviewed-by: Anup Patel Signed-off-by: Palmer Dabbelt Signed-off-by: Sasha Levin --- arch/riscv/configs/defconfig | 1 + arch/riscv/configs/rv32_defconfig | 1 + 2 files changed, 2 insertions(+) diff --git a/arch/riscv/configs/defconfig b/arch/riscv/configs/defconfig index c252fd5706d20d..f2a2f9c9ed49cb 100644 --- a/arch/riscv/configs/defconfig +++ b/arch/riscv/configs/defconfig @@ -76,6 +76,7 @@ CONFIG_DRM=m CONFIG_DRM_RADEON=m CONFIG_DRM_NOUVEAU=m CONFIG_DRM_VIRTIO_GPU=m +CONFIG_FB=y CONFIG_FRAMEBUFFER_CONSOLE=y CONFIG_USB=y CONFIG_USB_XHCI_HCD=y diff --git a/arch/riscv/configs/rv32_defconfig b/arch/riscv/configs/rv32_defconfig index 434ef5b645998e..cdd113e7a2912d 100644 --- a/arch/riscv/configs/rv32_defconfig +++ b/arch/riscv/configs/rv32_defconfig @@ -71,6 +71,7 @@ CONFIG_POWER_RESET=y CONFIG_DRM=y CONFIG_DRM_RADEON=y CONFIG_DRM_VIRTIO_GPU=y +CONFIG_FB=y CONFIG_FRAMEBUFFER_CONSOLE=y CONFIG_USB=y CONFIG_USB_XHCI_HCD=y From f1c36a47a3b409faadb3db1ef6a472dd456efa81 Mon Sep 17 00:00:00 2001 From: Roi Dayan Date: Thu, 12 Aug 2021 09:38:32 +0300 Subject: [PATCH 0314/1056] net/mlx5e: Check action fwd/drop flag exists also for nic flows [ Upstream commit 6b50cf45b6a0e99f3cab848a72ecca8da56b7460 ] The driver should add offloaded rules with either a fwd or drop action. The check existed in parsing fdb flows but not when parsing nic flows. Move the test into actions_match_supported() which is called for checking nic flows and fdb flows. Signed-off-by: Roi Dayan Reviewed-by: Maor Dickman Signed-off-by: Saeed Mahameed Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 3aa8d0b83d1083..fe52db59112149 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -3305,6 +3305,12 @@ static bool actions_match_supported(struct mlx5e_priv *priv, ct_flow = flow_flag_test(flow, CT) && !ct_clear; actions = flow->attr->action; + if (!(actions & + (MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_DROP))) { + NL_SET_ERR_MSG_MOD(extack, "Rule must have at least one forward/drop action"); + return false; + } + if (mlx5e_is_eswitch_flow(flow)) { if (flow->attr->esw_attr->split_count && ct_flow && !MLX5_CAP_GEN(flow->attr->esw_attr->in_mdev, reg_c_preserve)) { @@ -4207,13 +4213,6 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; } - if (!(attr->action & - (MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_DROP))) { - NL_SET_ERR_MSG_MOD(extack, - "Rule must have at least one forward/drop action"); - return -EOPNOTSUPP; - } - if (esw_attr->split_count > 0 && !mlx5_esw_has_fwd_fdb(priv->mdev)) { NL_SET_ERR_MSG_MOD(extack, "current firmware doesn't support split rule for port mirroring"); From aa944fefb39647a4118d890e7f18cce525f68fc4 Mon Sep 17 00:00:00 2001 From: Roi Dayan Date: Sun, 15 Aug 2021 12:53:13 +0300 Subject: [PATCH 0315/1056] net/mlx5e: Split actions_match_supported() into a sub function [ Upstream commit 9c1d3511a2c2fd30c991a20c670991ece4ef27c1 ] There will probably be more checks, some for nic flows, some for fdb flows and some are shared checks. Split it for fdb and nic to avoid the function getting too big. Signed-off-by: Roi Dayan Reviewed-by: Maor Dickman Signed-off-by: Saeed Mahameed Signed-off-by: Sasha Levin --- .../net/ethernet/mellanox/mlx5/core/en_tc.c | 65 +++++++++++-------- 1 file changed, 39 insertions(+), 26 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index fe52db59112149..3c6c7801f13168 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -3291,19 +3291,41 @@ static bool modify_header_match_supported(struct mlx5e_priv *priv, return true; } -static bool actions_match_supported(struct mlx5e_priv *priv, - struct flow_action *flow_action, - struct mlx5e_tc_flow_parse_attr *parse_attr, - struct mlx5e_tc_flow *flow, - struct netlink_ext_ack *extack) +static bool +actions_match_supported_fdb(struct mlx5e_priv *priv, + struct mlx5e_tc_flow_parse_attr *parse_attr, + struct mlx5e_tc_flow *flow, + struct netlink_ext_ack *extack) +{ + bool ct_flow, ct_clear; + + ct_clear = flow->attr->ct_attr.ct_action & TCA_CT_ACT_CLEAR; + ct_flow = flow_flag_test(flow, CT) && !ct_clear; + + if (flow->attr->esw_attr->split_count && ct_flow && + !MLX5_CAP_GEN(flow->attr->esw_attr->in_mdev, reg_c_preserve)) { + /* All registers used by ct are cleared when using + * split rules. + */ + NL_SET_ERR_MSG_MOD(extack, "Can't offload mirroring with action ct"); + return false; + } + + return true; +} + +static bool +actions_match_supported(struct mlx5e_priv *priv, + struct flow_action *flow_action, + struct mlx5e_tc_flow_parse_attr *parse_attr, + struct mlx5e_tc_flow *flow, + struct netlink_ext_ack *extack) { - bool ct_flow = false, ct_clear = false; - u32 actions; + u32 actions = flow->attr->action; + bool ct_flow, ct_clear; - ct_clear = flow->attr->ct_attr.ct_action & - TCA_CT_ACT_CLEAR; + ct_clear = flow->attr->ct_attr.ct_action & TCA_CT_ACT_CLEAR; ct_flow = flow_flag_test(flow, CT) && !ct_clear; - actions = flow->attr->action; if (!(actions & (MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_DROP))) { @@ -3311,23 +3333,14 @@ static bool actions_match_supported(struct mlx5e_priv *priv, return false; } - if (mlx5e_is_eswitch_flow(flow)) { - if (flow->attr->esw_attr->split_count && ct_flow && - !MLX5_CAP_GEN(flow->attr->esw_attr->in_mdev, reg_c_preserve)) { - /* All registers used by ct are cleared when using - * split rules. - */ - NL_SET_ERR_MSG_MOD(extack, - "Can't offload mirroring with action ct"); - return false; - } - } + if (actions & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR && + !modify_header_match_supported(priv, &parse_attr->spec, flow_action, + actions, ct_flow, ct_clear, extack)) + return false; - if (actions & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) - return modify_header_match_supported(priv, &parse_attr->spec, - flow_action, actions, - ct_flow, ct_clear, - extack); + if (mlx5e_is_eswitch_flow(flow) && + !actions_match_supported_fdb(priv, parse_attr, flow, extack)) + return false; return true; } From 6d1ac7f882dabb7b67cb71ba15c66fdfd7160399 Mon Sep 17 00:00:00 2001 From: Roi Dayan Date: Tue, 4 Jan 2022 10:38:02 +0200 Subject: [PATCH 0316/1056] net/mlx5e: TC, Reject rules with drop and modify hdr action [ Upstream commit a2446bc77a16cefd27de712d28af2396d6287593 ] This kind of action is not supported by firmware and generates a syndrome. kernel: mlx5_core 0000:08:00.0: mlx5_cmd_check:777:(pid 102063): SET_FLOW_TABLE_ENTRY(0x936) op_mod(0x0) failed, status bad parameter(0x3), syndrome (0x8708c3) Fixes: d7e75a325cb2 ("net/mlx5e: Add offloading of E-Switch TC pedit (header re-write) actions") Signed-off-by: Roi Dayan Reviewed-by: Oz Shlomo Reviewed-by: Maor Dickman Signed-off-by: Saeed Mahameed Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 3c6c7801f13168..eb0d9082ccc5bd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -3333,6 +3333,12 @@ actions_match_supported(struct mlx5e_priv *priv, return false; } + if (actions & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR && + actions & MLX5_FLOW_CONTEXT_ACTION_DROP) { + NL_SET_ERR_MSG_MOD(extack, "Drop with modify header action is not supported"); + return false; + } + if (actions & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR && !modify_header_match_supported(priv, &parse_attr->spec, flow_action, actions, ct_flow, ct_clear, extack)) From 301ebfa578e35c7ca1b529044d038121a0b2b61b Mon Sep 17 00:00:00 2001 From: Roi Dayan Date: Mon, 17 Jan 2022 15:00:30 +0200 Subject: [PATCH 0317/1056] net/mlx5e: TC, Reject rules with forward and drop actions [ Upstream commit 5623ef8a118838aae65363750dfafcba734dc8cb ] Such rules are redundant but allowed and passed to the driver. The driver does not support offloading such rules so return an error. Fixes: 03a9d11e6eeb ("net/mlx5e: Add TC drop and mirred/redirect action parsing for SRIOV offloads") Signed-off-by: Roi Dayan Reviewed-by: Oz Shlomo Signed-off-by: Saeed Mahameed Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index eb0d9082ccc5bd..843c8435387f36 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -3333,6 +3333,12 @@ actions_match_supported(struct mlx5e_priv *priv, return false; } + if (!(~actions & + (MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_DROP))) { + NL_SET_ERR_MSG_MOD(extack, "Rule cannot support forward+drop action"); + return false; + } + if (actions & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR && actions & MLX5_FLOW_CONTEXT_ACTION_DROP) { NL_SET_ERR_MSG_MOD(extack, "Drop with modify header action is not supported"); From 4b72179e53bda916363d513bd1b3c0091c721df7 Mon Sep 17 00:00:00 2001 From: Derek Fang Date: Tue, 9 Nov 2021 17:54:49 +0800 Subject: [PATCH 0318/1056] ASoC: rt5682: Avoid the unexpected IRQ event during going to suspend [ Upstream commit a3774a2a6544a7a4a85186e768afc07044aa507f ] When the system suspends, the codec driver will set SAR to power saving mode if a headset is plugged in. There is a chance to generate an unexpected IRQ, and leads to issues after resuming such as noise from OMTP type headsets. Signed-off-by: Derek Fang Link: https://lore.kernel.org/r/20211109095450.12950-1-derek.fang@realtek.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/codecs/rt5682.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/sound/soc/codecs/rt5682.c b/sound/soc/codecs/rt5682.c index 6ad3159eceb98a..949b5638db298a 100644 --- a/sound/soc/codecs/rt5682.c +++ b/sound/soc/codecs/rt5682.c @@ -48,6 +48,7 @@ static const struct reg_sequence patch_list[] = { {RT5682_SAR_IL_CMD_6, 0x0110}, {RT5682_CHARGE_PUMP_1, 0x0210}, {RT5682_HP_LOGIC_CTRL_2, 0x0007}, + {RT5682_SAR_IL_CMD_2, 0xac00}, }; void rt5682_apply_patch_list(struct rt5682_priv *rt5682, struct device *dev) @@ -2969,9 +2970,6 @@ static int rt5682_suspend(struct snd_soc_component *component) cancel_delayed_work_sync(&rt5682->jack_detect_work); cancel_delayed_work_sync(&rt5682->jd_check_work); if (rt5682->hs_jack && rt5682->jack_type == SND_JACK_HEADSET) { - snd_soc_component_update_bits(component, RT5682_CBJ_CTRL_1, - RT5682_MB1_PATH_MASK | RT5682_MB2_PATH_MASK, - RT5682_CTRL_MB1_REG | RT5682_CTRL_MB2_REG); val = snd_soc_component_read(component, RT5682_CBJ_CTRL_2) & RT5682_JACK_TYPE_MASK; @@ -2993,10 +2991,15 @@ static int rt5682_suspend(struct snd_soc_component *component) /* enter SAR ADC power saving mode */ snd_soc_component_update_bits(component, RT5682_SAR_IL_CMD_1, RT5682_SAR_BUTT_DET_MASK | RT5682_SAR_BUTDET_MODE_MASK | - RT5682_SAR_BUTDET_RST_MASK | RT5682_SAR_SEL_MB1_MB2_MASK, 0); + RT5682_SAR_SEL_MB1_MB2_MASK, 0); + usleep_range(5000, 6000); + snd_soc_component_update_bits(component, RT5682_CBJ_CTRL_1, + RT5682_MB1_PATH_MASK | RT5682_MB2_PATH_MASK, + RT5682_CTRL_MB1_REG | RT5682_CTRL_MB2_REG); + usleep_range(10000, 12000); snd_soc_component_update_bits(component, RT5682_SAR_IL_CMD_1, - RT5682_SAR_BUTT_DET_MASK | RT5682_SAR_BUTDET_MODE_MASK | RT5682_SAR_BUTDET_RST_MASK, - RT5682_SAR_BUTT_DET_EN | RT5682_SAR_BUTDET_POW_SAV | RT5682_SAR_BUTDET_RST_NORMAL); + RT5682_SAR_BUTT_DET_MASK | RT5682_SAR_BUTDET_MODE_MASK, + RT5682_SAR_BUTT_DET_EN | RT5682_SAR_BUTDET_POW_SAV); } regcache_cache_only(rt5682->regmap, true); From f536e0df64b8c3095b3575ea114d07b0d0e8627c Mon Sep 17 00:00:00 2001 From: Derek Fang Date: Tue, 9 Nov 2021 17:54:50 +0800 Subject: [PATCH 0319/1056] ASoC: rt5682: Re-detect the combo jack after resuming [ Upstream commit 2cd9b0ef82d936623d789bb3fbb6fcf52c500367 ] Sometimes, end-users change the jack type under suspending, so it needs to re-detect the combo jack type after resuming to avoid any unexpected behaviors. Signed-off-by: Derek Fang Link: https://lore.kernel.org/r/20211109095450.12950-2-derek.fang@realtek.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/codecs/rt5682-i2c.c | 1 + sound/soc/codecs/rt5682.c | 23 ++++++++++++++++++++--- sound/soc/codecs/rt5682.h | 1 + 3 files changed, 22 insertions(+), 3 deletions(-) diff --git a/sound/soc/codecs/rt5682-i2c.c b/sound/soc/codecs/rt5682-i2c.c index b9d5d7a0975b30..b17c14b8b36e3b 100644 --- a/sound/soc/codecs/rt5682-i2c.c +++ b/sound/soc/codecs/rt5682-i2c.c @@ -196,6 +196,7 @@ static int rt5682_i2c_probe(struct i2c_client *i2c, } mutex_init(&rt5682->calibrate_mutex); + mutex_init(&rt5682->jdet_mutex); rt5682_calibrate(rt5682); rt5682_apply_patch_list(rt5682, &i2c->dev); diff --git a/sound/soc/codecs/rt5682.c b/sound/soc/codecs/rt5682.c index 949b5638db298a..d470b918976835 100644 --- a/sound/soc/codecs/rt5682.c +++ b/sound/soc/codecs/rt5682.c @@ -49,6 +49,7 @@ static const struct reg_sequence patch_list[] = { {RT5682_CHARGE_PUMP_1, 0x0210}, {RT5682_HP_LOGIC_CTRL_2, 0x0007}, {RT5682_SAR_IL_CMD_2, 0xac00}, + {RT5682_CBJ_CTRL_7, 0x0104}, }; void rt5682_apply_patch_list(struct rt5682_priv *rt5682, struct device *dev) @@ -943,6 +944,10 @@ int rt5682_headset_detect(struct snd_soc_component *component, int jack_insert) snd_soc_component_update_bits(component, RT5682_HP_CHARGE_PUMP_1, RT5682_OSW_L_MASK | RT5682_OSW_R_MASK, 0); + rt5682_enable_push_button_irq(component, false); + snd_soc_component_update_bits(component, RT5682_CBJ_CTRL_1, + RT5682_TRIG_JD_MASK, RT5682_TRIG_JD_LOW); + usleep_range(55000, 60000); snd_soc_component_update_bits(component, RT5682_CBJ_CTRL_1, RT5682_TRIG_JD_MASK, RT5682_TRIG_JD_HIGH); @@ -1099,6 +1104,7 @@ void rt5682_jack_detect_handler(struct work_struct *work) return; } + mutex_lock(&rt5682->jdet_mutex); mutex_lock(&rt5682->calibrate_mutex); val = snd_soc_component_read(rt5682->component, RT5682_AJD1_CTRL) @@ -1172,6 +1178,7 @@ void rt5682_jack_detect_handler(struct work_struct *work) } mutex_unlock(&rt5682->calibrate_mutex); + mutex_unlock(&rt5682->jdet_mutex); } EXPORT_SYMBOL_GPL(rt5682_jack_detect_handler); @@ -1521,6 +1528,7 @@ static int rt5682_hp_event(struct snd_soc_dapm_widget *w, { struct snd_soc_component *component = snd_soc_dapm_to_component(w->dapm); + struct rt5682_priv *rt5682 = snd_soc_component_get_drvdata(component); switch (event) { case SND_SOC_DAPM_PRE_PMU: @@ -1532,12 +1540,17 @@ static int rt5682_hp_event(struct snd_soc_dapm_widget *w, RT5682_DEPOP_1, 0x60, 0x60); snd_soc_component_update_bits(component, RT5682_DAC_ADC_DIG_VOL1, 0x00c0, 0x0080); + + mutex_lock(&rt5682->jdet_mutex); + snd_soc_component_update_bits(component, RT5682_HP_CTRL_2, RT5682_HP_C2_DAC_L_EN | RT5682_HP_C2_DAC_R_EN, RT5682_HP_C2_DAC_L_EN | RT5682_HP_C2_DAC_R_EN); usleep_range(5000, 10000); snd_soc_component_update_bits(component, RT5682_CHARGE_PUMP_1, RT5682_CP_SW_SIZE_MASK, RT5682_CP_SW_SIZE_L); + + mutex_unlock(&rt5682->jdet_mutex); break; case SND_SOC_DAPM_POST_PMD: @@ -2969,7 +2982,7 @@ static int rt5682_suspend(struct snd_soc_component *component) cancel_delayed_work_sync(&rt5682->jack_detect_work); cancel_delayed_work_sync(&rt5682->jd_check_work); - if (rt5682->hs_jack && rt5682->jack_type == SND_JACK_HEADSET) { + if (rt5682->hs_jack && (rt5682->jack_type & SND_JACK_HEADSET) == SND_JACK_HEADSET) { val = snd_soc_component_read(component, RT5682_CBJ_CTRL_2) & RT5682_JACK_TYPE_MASK; @@ -3000,6 +3013,8 @@ static int rt5682_suspend(struct snd_soc_component *component) snd_soc_component_update_bits(component, RT5682_SAR_IL_CMD_1, RT5682_SAR_BUTT_DET_MASK | RT5682_SAR_BUTDET_MODE_MASK, RT5682_SAR_BUTT_DET_EN | RT5682_SAR_BUTDET_POW_SAV); + snd_soc_component_update_bits(component, RT5682_HP_CHARGE_PUMP_1, + RT5682_OSW_L_MASK | RT5682_OSW_R_MASK, 0); } regcache_cache_only(rt5682->regmap, true); @@ -3017,10 +3032,11 @@ static int rt5682_resume(struct snd_soc_component *component) regcache_cache_only(rt5682->regmap, false); regcache_sync(rt5682->regmap); - if (rt5682->hs_jack && rt5682->jack_type == SND_JACK_HEADSET) { + if (rt5682->hs_jack && (rt5682->jack_type & SND_JACK_HEADSET) == SND_JACK_HEADSET) { snd_soc_component_update_bits(component, RT5682_SAR_IL_CMD_1, RT5682_SAR_BUTDET_MODE_MASK | RT5682_SAR_SEL_MB1_MB2_MASK, RT5682_SAR_BUTDET_POW_NORM | RT5682_SAR_SEL_MB1_MB2_AUTO); + usleep_range(5000, 6000); snd_soc_component_update_bits(component, RT5682_CBJ_CTRL_1, RT5682_MB1_PATH_MASK | RT5682_MB2_PATH_MASK, RT5682_CTRL_MB1_FSM | RT5682_CTRL_MB2_FSM); @@ -3028,8 +3044,9 @@ static int rt5682_resume(struct snd_soc_component *component) RT5682_PWR_CBJ, RT5682_PWR_CBJ); } + rt5682->jack_type = 0; mod_delayed_work(system_power_efficient_wq, - &rt5682->jack_detect_work, msecs_to_jiffies(250)); + &rt5682->jack_detect_work, msecs_to_jiffies(0)); return 0; } diff --git a/sound/soc/codecs/rt5682.h b/sound/soc/codecs/rt5682.h index 8e3244a62c1606..f866d207d1bd69 100644 --- a/sound/soc/codecs/rt5682.h +++ b/sound/soc/codecs/rt5682.h @@ -1462,6 +1462,7 @@ struct rt5682_priv { int jack_type; int irq_work_delay_time; + struct mutex jdet_mutex; }; extern const char *rt5682_supply_names[RT5682_NUM_SUPPLIES]; From 25ca15fed4bb83cf3ed1bd44dd18c790c3eba453 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Wed, 26 Jan 2022 12:03:25 +0200 Subject: [PATCH 0320/1056] ASoC: rt5682: Fix deadlock on resume [ Upstream commit 4045daf0fa87846a27f56329fddad2deeb5ca354 ] On resume from suspend the following chain of events can happen: A rt5682_resume() -> mod_delayed_work() for jack_detect_work B DAPM sequence starts ( DAPM is locked now) A1. rt5682_jack_detect_handler() scheduled - Takes both jdet_mutex and calibrate_mutex - Calls in to rt5682_headset_detect() which tries to take DAPM lock, it starts to wait for it as B path took it already. B1. DAPM sequence reaches the "HP Amp", rt5682_hp_event() tries to take the jdet_mutex, but it is locked in A1, so it waits. Deadlock. To solve the deadlock, drop the jdet_mutex, use the jack_detect_work to do the jack removal handling, move the dapm lock up one level to protect the most of the rt5682_jack_detect_handler(), but not the jack reporting as it might trigger a DAPM sequence. The rt5682_headset_detect() can be changed to static as well. Fixes: 8deb34a90f063 ("ASoC: rt5682: fix the wrong jack type detected") Signed-off-by: Peter Ujfalusi Link: https://lore.kernel.org/r/20220126100325.16513-1-peter.ujfalusi@linux.intel.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/codecs/rt5682-i2c.c | 15 ++++----------- sound/soc/codecs/rt5682.c | 24 ++++++++---------------- sound/soc/codecs/rt5682.h | 2 -- 3 files changed, 12 insertions(+), 29 deletions(-) diff --git a/sound/soc/codecs/rt5682-i2c.c b/sound/soc/codecs/rt5682-i2c.c index b17c14b8b36e3b..74a1fee071dd6e 100644 --- a/sound/soc/codecs/rt5682-i2c.c +++ b/sound/soc/codecs/rt5682-i2c.c @@ -59,18 +59,12 @@ static void rt5682_jd_check_handler(struct work_struct *work) struct rt5682_priv *rt5682 = container_of(work, struct rt5682_priv, jd_check_work.work); - if (snd_soc_component_read(rt5682->component, RT5682_AJD1_CTRL) - & RT5682_JDH_RS_MASK) { + if (snd_soc_component_read(rt5682->component, RT5682_AJD1_CTRL) & RT5682_JDH_RS_MASK) /* jack out */ - rt5682->jack_type = rt5682_headset_detect(rt5682->component, 0); - - snd_soc_jack_report(rt5682->hs_jack, rt5682->jack_type, - SND_JACK_HEADSET | - SND_JACK_BTN_0 | SND_JACK_BTN_1 | - SND_JACK_BTN_2 | SND_JACK_BTN_3); - } else { + mod_delayed_work(system_power_efficient_wq, + &rt5682->jack_detect_work, 0); + else schedule_delayed_work(&rt5682->jd_check_work, 500); - } } static irqreturn_t rt5682_irq(int irq, void *data) @@ -196,7 +190,6 @@ static int rt5682_i2c_probe(struct i2c_client *i2c, } mutex_init(&rt5682->calibrate_mutex); - mutex_init(&rt5682->jdet_mutex); rt5682_calibrate(rt5682); rt5682_apply_patch_list(rt5682, &i2c->dev); diff --git a/sound/soc/codecs/rt5682.c b/sound/soc/codecs/rt5682.c index d470b918976835..1cd59e166ccece 100644 --- a/sound/soc/codecs/rt5682.c +++ b/sound/soc/codecs/rt5682.c @@ -922,15 +922,13 @@ static void rt5682_enable_push_button_irq(struct snd_soc_component *component, * * Returns detect status. */ -int rt5682_headset_detect(struct snd_soc_component *component, int jack_insert) +static int rt5682_headset_detect(struct snd_soc_component *component, int jack_insert) { struct rt5682_priv *rt5682 = snd_soc_component_get_drvdata(component); struct snd_soc_dapm_context *dapm = &component->dapm; unsigned int val, count; if (jack_insert) { - snd_soc_dapm_mutex_lock(dapm); - snd_soc_component_update_bits(component, RT5682_PWR_ANLG_1, RT5682_PWR_VREF2 | RT5682_PWR_MB, RT5682_PWR_VREF2 | RT5682_PWR_MB); @@ -981,8 +979,6 @@ int rt5682_headset_detect(struct snd_soc_component *component, int jack_insert) snd_soc_component_update_bits(component, RT5682_MICBIAS_2, RT5682_PWR_CLK25M_MASK | RT5682_PWR_CLK1M_MASK, RT5682_PWR_CLK25M_PU | RT5682_PWR_CLK1M_PU); - - snd_soc_dapm_mutex_unlock(dapm); } else { rt5682_enable_push_button_irq(component, false); snd_soc_component_update_bits(component, RT5682_CBJ_CTRL_1, @@ -1011,7 +1007,6 @@ int rt5682_headset_detect(struct snd_soc_component *component, int jack_insert) dev_dbg(component->dev, "jack_type = %d\n", rt5682->jack_type); return rt5682->jack_type; } -EXPORT_SYMBOL_GPL(rt5682_headset_detect); static int rt5682_set_jack_detect(struct snd_soc_component *component, struct snd_soc_jack *hs_jack, void *data) @@ -1094,6 +1089,7 @@ void rt5682_jack_detect_handler(struct work_struct *work) { struct rt5682_priv *rt5682 = container_of(work, struct rt5682_priv, jack_detect_work.work); + struct snd_soc_dapm_context *dapm; int val, btn_type; if (!rt5682->component || !rt5682->component->card || @@ -1104,7 +1100,9 @@ void rt5682_jack_detect_handler(struct work_struct *work) return; } - mutex_lock(&rt5682->jdet_mutex); + dapm = snd_soc_component_get_dapm(rt5682->component); + + snd_soc_dapm_mutex_lock(dapm); mutex_lock(&rt5682->calibrate_mutex); val = snd_soc_component_read(rt5682->component, RT5682_AJD1_CTRL) @@ -1164,6 +1162,9 @@ void rt5682_jack_detect_handler(struct work_struct *work) rt5682->irq_work_delay_time = 50; } + mutex_unlock(&rt5682->calibrate_mutex); + snd_soc_dapm_mutex_unlock(dapm); + snd_soc_jack_report(rt5682->hs_jack, rt5682->jack_type, SND_JACK_HEADSET | SND_JACK_BTN_0 | SND_JACK_BTN_1 | @@ -1176,9 +1177,6 @@ void rt5682_jack_detect_handler(struct work_struct *work) else cancel_delayed_work_sync(&rt5682->jd_check_work); } - - mutex_unlock(&rt5682->calibrate_mutex); - mutex_unlock(&rt5682->jdet_mutex); } EXPORT_SYMBOL_GPL(rt5682_jack_detect_handler); @@ -1528,7 +1526,6 @@ static int rt5682_hp_event(struct snd_soc_dapm_widget *w, { struct snd_soc_component *component = snd_soc_dapm_to_component(w->dapm); - struct rt5682_priv *rt5682 = snd_soc_component_get_drvdata(component); switch (event) { case SND_SOC_DAPM_PRE_PMU: @@ -1540,17 +1537,12 @@ static int rt5682_hp_event(struct snd_soc_dapm_widget *w, RT5682_DEPOP_1, 0x60, 0x60); snd_soc_component_update_bits(component, RT5682_DAC_ADC_DIG_VOL1, 0x00c0, 0x0080); - - mutex_lock(&rt5682->jdet_mutex); - snd_soc_component_update_bits(component, RT5682_HP_CTRL_2, RT5682_HP_C2_DAC_L_EN | RT5682_HP_C2_DAC_R_EN, RT5682_HP_C2_DAC_L_EN | RT5682_HP_C2_DAC_R_EN); usleep_range(5000, 10000); snd_soc_component_update_bits(component, RT5682_CHARGE_PUMP_1, RT5682_CP_SW_SIZE_MASK, RT5682_CP_SW_SIZE_L); - - mutex_unlock(&rt5682->jdet_mutex); break; case SND_SOC_DAPM_POST_PMD: diff --git a/sound/soc/codecs/rt5682.h b/sound/soc/codecs/rt5682.h index f866d207d1bd69..539a9fe26294a8 100644 --- a/sound/soc/codecs/rt5682.h +++ b/sound/soc/codecs/rt5682.h @@ -1462,7 +1462,6 @@ struct rt5682_priv { int jack_type; int irq_work_delay_time; - struct mutex jdet_mutex; }; extern const char *rt5682_supply_names[RT5682_NUM_SUPPLIES]; @@ -1472,7 +1471,6 @@ int rt5682_sel_asrc_clk_src(struct snd_soc_component *component, void rt5682_apply_patch_list(struct rt5682_priv *rt5682, struct device *dev); -int rt5682_headset_detect(struct snd_soc_component *component, int jack_insert); void rt5682_jack_detect_handler(struct work_struct *work); bool rt5682_volatile_register(struct device *dev, unsigned int reg); From 0d9bd7e6ac3af627df314b0800b778b7912576b7 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 28 Oct 2021 21:47:55 +0200 Subject: [PATCH 0321/1056] netfilter: nf_tables: convert pktinfo->tprot_set to flags field [ Upstream commit b5bdc6f9c24db9a0adf8bd00c0e935b184654f00 ] Generalize boolean field to store more flags on the pktinfo structure. Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- include/net/netfilter/nf_tables.h | 8 ++++++-- include/net/netfilter/nf_tables_ipv4.h | 7 ++++--- include/net/netfilter/nf_tables_ipv6.h | 6 +++--- net/netfilter/nf_tables_core.c | 2 +- net/netfilter/nf_tables_trace.c | 4 ++-- net/netfilter/nft_meta.c | 2 +- net/netfilter/nft_payload.c | 4 ++-- 7 files changed, 19 insertions(+), 14 deletions(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 2af1c2c64128e9..d005f87691daa9 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -21,10 +21,14 @@ struct module; #define NFT_JUMP_STACK_SIZE 16 +enum { + NFT_PKTINFO_L4PROTO = (1 << 0), +}; + struct nft_pktinfo { struct sk_buff *skb; const struct nf_hook_state *state; - bool tprot_set; + u8 flags; u8 tprot; u16 fragoff; unsigned int thoff; @@ -75,7 +79,7 @@ static inline void nft_set_pktinfo(struct nft_pktinfo *pkt, static inline void nft_set_pktinfo_unspec(struct nft_pktinfo *pkt) { - pkt->tprot_set = false; + pkt->flags = 0; pkt->tprot = 0; pkt->thoff = 0; pkt->fragoff = 0; diff --git a/include/net/netfilter/nf_tables_ipv4.h b/include/net/netfilter/nf_tables_ipv4.h index eb4c094cd54d23..c4a6147b0ef8ce 100644 --- a/include/net/netfilter/nf_tables_ipv4.h +++ b/include/net/netfilter/nf_tables_ipv4.h @@ -10,7 +10,7 @@ static inline void nft_set_pktinfo_ipv4(struct nft_pktinfo *pkt) struct iphdr *ip; ip = ip_hdr(pkt->skb); - pkt->tprot_set = true; + pkt->flags = NFT_PKTINFO_L4PROTO; pkt->tprot = ip->protocol; pkt->thoff = ip_hdrlen(pkt->skb); pkt->fragoff = ntohs(ip->frag_off) & IP_OFFSET; @@ -36,7 +36,7 @@ static inline int __nft_set_pktinfo_ipv4_validate(struct nft_pktinfo *pkt) else if (len < thoff) return -1; - pkt->tprot_set = true; + pkt->flags = NFT_PKTINFO_L4PROTO; pkt->tprot = iph->protocol; pkt->thoff = thoff; pkt->fragoff = ntohs(iph->frag_off) & IP_OFFSET; @@ -71,7 +71,7 @@ static inline int nft_set_pktinfo_ipv4_ingress(struct nft_pktinfo *pkt) goto inhdr_error; } - pkt->tprot_set = true; + pkt->flags = NFT_PKTINFO_L4PROTO; pkt->tprot = iph->protocol; pkt->thoff = thoff; pkt->fragoff = ntohs(iph->frag_off) & IP_OFFSET; @@ -82,4 +82,5 @@ static inline int nft_set_pktinfo_ipv4_ingress(struct nft_pktinfo *pkt) __IP_INC_STATS(nft_net(pkt), IPSTATS_MIB_INHDRERRORS); return -1; } + #endif diff --git a/include/net/netfilter/nf_tables_ipv6.h b/include/net/netfilter/nf_tables_ipv6.h index 7595e02b00ba08..ec7eaeaf4f04c2 100644 --- a/include/net/netfilter/nf_tables_ipv6.h +++ b/include/net/netfilter/nf_tables_ipv6.h @@ -18,7 +18,7 @@ static inline void nft_set_pktinfo_ipv6(struct nft_pktinfo *pkt) return; } - pkt->tprot_set = true; + pkt->flags = NFT_PKTINFO_L4PROTO; pkt->tprot = protohdr; pkt->thoff = thoff; pkt->fragoff = frag_off; @@ -50,7 +50,7 @@ static inline int __nft_set_pktinfo_ipv6_validate(struct nft_pktinfo *pkt) if (protohdr < 0) return -1; - pkt->tprot_set = true; + pkt->flags = NFT_PKTINFO_L4PROTO; pkt->tprot = protohdr; pkt->thoff = thoff; pkt->fragoff = frag_off; @@ -96,7 +96,7 @@ static inline int nft_set_pktinfo_ipv6_ingress(struct nft_pktinfo *pkt) if (protohdr < 0) goto inhdr_error; - pkt->tprot_set = true; + pkt->flags = NFT_PKTINFO_L4PROTO; pkt->tprot = protohdr; pkt->thoff = thoff; pkt->fragoff = frag_off; diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c index 907e848dbc1782..d4d8f613af5125 100644 --- a/net/netfilter/nf_tables_core.c +++ b/net/netfilter/nf_tables_core.c @@ -79,7 +79,7 @@ static bool nft_payload_fast_eval(const struct nft_expr *expr, if (priv->base == NFT_PAYLOAD_NETWORK_HEADER) ptr = skb_network_header(skb); else { - if (!pkt->tprot_set) + if (!(pkt->flags & NFT_PKTINFO_L4PROTO)) return false; ptr = skb_network_header(skb) + nft_thoff(pkt); } diff --git a/net/netfilter/nf_tables_trace.c b/net/netfilter/nf_tables_trace.c index e4fe2f0780eb6f..84a7dea46efaed 100644 --- a/net/netfilter/nf_tables_trace.c +++ b/net/netfilter/nf_tables_trace.c @@ -113,13 +113,13 @@ static int nf_trace_fill_pkt_info(struct sk_buff *nlskb, int off = skb_network_offset(skb); unsigned int len, nh_end; - nh_end = pkt->tprot_set ? nft_thoff(pkt) : skb->len; + nh_end = pkt->flags & NFT_PKTINFO_L4PROTO ? nft_thoff(pkt) : skb->len; len = min_t(unsigned int, nh_end - skb_network_offset(skb), NFT_TRACETYPE_NETWORK_HSIZE); if (trace_fill_header(nlskb, NFTA_TRACE_NETWORK_HEADER, skb, off, len)) return -1; - if (pkt->tprot_set) { + if (pkt->flags & NFT_PKTINFO_L4PROTO) { len = min_t(unsigned int, skb->len - nft_thoff(pkt), NFT_TRACETYPE_TRANSPORT_HSIZE); if (trace_fill_header(nlskb, NFTA_TRACE_TRANSPORT_HEADER, skb, diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c index 44d9b38e5f90c5..14412f69a34e8c 100644 --- a/net/netfilter/nft_meta.c +++ b/net/netfilter/nft_meta.c @@ -321,7 +321,7 @@ void nft_meta_get_eval(const struct nft_expr *expr, nft_reg_store8(dest, nft_pf(pkt)); break; case NFT_META_L4PROTO: - if (!pkt->tprot_set) + if (!(pkt->flags & NFT_PKTINFO_L4PROTO)) goto err; nft_reg_store8(dest, pkt->tprot); break; diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c index 132875cd7fff22..c3ccfff54a3575 100644 --- a/net/netfilter/nft_payload.c +++ b/net/netfilter/nft_payload.c @@ -108,7 +108,7 @@ void nft_payload_eval(const struct nft_expr *expr, offset = skb_network_offset(skb); break; case NFT_PAYLOAD_TRANSPORT_HEADER: - if (!pkt->tprot_set) + if (!(pkt->flags & NFT_PKTINFO_L4PROTO)) goto err; offset = nft_thoff(pkt); break; @@ -613,7 +613,7 @@ static void nft_payload_set_eval(const struct nft_expr *expr, offset = skb_network_offset(skb); break; case NFT_PAYLOAD_TRANSPORT_HEADER: - if (!pkt->tprot_set) + if (!(pkt->flags & NFT_PKTINFO_L4PROTO)) goto err; offset = nft_thoff(pkt); break; From 5445819e76a6fbcb9a848efd5569ea27e547f6ab Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 28 Oct 2021 22:15:00 +0200 Subject: [PATCH 0322/1056] netfilter: nft_payload: support for inner header matching / mangling [ Upstream commit c46b38dc8743535e686b911d253a844f0bd50ead ] Allow to match and mangle on inner headers / payload data after the transport header. There is a new field in the pktinfo structure that stores the inner header offset which is calculated only when requested. Only TCP and UDP supported at this stage. Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- include/net/netfilter/nf_tables.h | 2 + include/uapi/linux/netfilter/nf_tables.h | 2 + net/netfilter/nft_payload.c | 56 +++++++++++++++++++++++- 3 files changed, 58 insertions(+), 2 deletions(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index d005f87691daa9..bcfee89012a187 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -23,6 +23,7 @@ struct module; enum { NFT_PKTINFO_L4PROTO = (1 << 0), + NFT_PKTINFO_INNER = (1 << 1), }; struct nft_pktinfo { @@ -32,6 +33,7 @@ struct nft_pktinfo { u8 tprot; u16 fragoff; unsigned int thoff; + unsigned int inneroff; }; static inline struct sock *nft_sk(const struct nft_pktinfo *pkt) diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index e94d1fa554cb22..07871c8a060145 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -753,11 +753,13 @@ enum nft_dynset_attributes { * @NFT_PAYLOAD_LL_HEADER: link layer header * @NFT_PAYLOAD_NETWORK_HEADER: network header * @NFT_PAYLOAD_TRANSPORT_HEADER: transport header + * @NFT_PAYLOAD_INNER_HEADER: inner header / payload */ enum nft_payload_bases { NFT_PAYLOAD_LL_HEADER, NFT_PAYLOAD_NETWORK_HEADER, NFT_PAYLOAD_TRANSPORT_HEADER, + NFT_PAYLOAD_INNER_HEADER, }; /** diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c index c3ccfff54a3575..ee359a4a60f5bf 100644 --- a/net/netfilter/nft_payload.c +++ b/net/netfilter/nft_payload.c @@ -22,6 +22,7 @@ #include #include #include +#include #include static bool nft_payload_rebuild_vlan_hdr(const struct sk_buff *skb, int mac_off, @@ -79,6 +80,45 @@ nft_payload_copy_vlan(u32 *d, const struct sk_buff *skb, u8 offset, u8 len) return skb_copy_bits(skb, offset + mac_off, dst_u8, len) == 0; } +static int __nft_payload_inner_offset(struct nft_pktinfo *pkt) +{ + unsigned int thoff = nft_thoff(pkt); + + if (!(pkt->flags & NFT_PKTINFO_L4PROTO)) + return -1; + + switch (pkt->tprot) { + case IPPROTO_UDP: + pkt->inneroff = thoff + sizeof(struct udphdr); + break; + case IPPROTO_TCP: { + struct tcphdr *th, _tcph; + + th = skb_header_pointer(pkt->skb, thoff, sizeof(_tcph), &_tcph); + if (!th) + return -1; + + pkt->inneroff = thoff + __tcp_hdrlen(th); + } + break; + default: + return -1; + } + + pkt->flags |= NFT_PKTINFO_INNER; + + return 0; +} + +static int nft_payload_inner_offset(const struct nft_pktinfo *pkt) +{ + if (!(pkt->flags & NFT_PKTINFO_INNER) && + __nft_payload_inner_offset((struct nft_pktinfo *)pkt) < 0) + return -1; + + return pkt->inneroff; +} + void nft_payload_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) @@ -112,6 +152,11 @@ void nft_payload_eval(const struct nft_expr *expr, goto err; offset = nft_thoff(pkt); break; + case NFT_PAYLOAD_INNER_HEADER: + offset = nft_payload_inner_offset(pkt); + if (offset < 0) + goto err; + break; default: BUG(); } @@ -617,6 +662,11 @@ static void nft_payload_set_eval(const struct nft_expr *expr, goto err; offset = nft_thoff(pkt); break; + case NFT_PAYLOAD_INNER_HEADER: + offset = nft_payload_inner_offset(pkt); + if (offset < 0) + goto err; + break; default: BUG(); } @@ -625,7 +675,8 @@ static void nft_payload_set_eval(const struct nft_expr *expr, offset += priv->offset; if ((priv->csum_type == NFT_PAYLOAD_CSUM_INET || priv->csum_flags) && - (priv->base != NFT_PAYLOAD_TRANSPORT_HEADER || + ((priv->base != NFT_PAYLOAD_TRANSPORT_HEADER && + priv->base != NFT_PAYLOAD_INNER_HEADER) || skb->ip_summed != CHECKSUM_PARTIAL)) { fsum = skb_checksum(skb, offset, priv->len, 0); tsum = csum_partial(src, priv->len, 0); @@ -744,6 +795,7 @@ nft_payload_select_ops(const struct nft_ctx *ctx, case NFT_PAYLOAD_LL_HEADER: case NFT_PAYLOAD_NETWORK_HEADER: case NFT_PAYLOAD_TRANSPORT_HEADER: + case NFT_PAYLOAD_INNER_HEADER: break; default: return ERR_PTR(-EOPNOTSUPP); @@ -762,7 +814,7 @@ nft_payload_select_ops(const struct nft_ctx *ctx, len = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_LEN])); if (len <= 4 && is_power_of_2(len) && IS_ALIGNED(offset, len) && - base != NFT_PAYLOAD_LL_HEADER) + base != NFT_PAYLOAD_LL_HEADER && base != NFT_PAYLOAD_INNER_HEADER) return &nft_payload_fast_ops; else return &nft_payload_ops; From aa1f19606558a26086d2dd71a8ffc1327ccd451e Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sat, 29 Jan 2022 17:13:23 +0100 Subject: [PATCH 0323/1056] netfilter: nft_payload: don't allow th access for fragments [ Upstream commit a9e8503def0fd4ed89ade1f61c315f904581d439 ] Loads relative to ->thoff naturally expect that this points to the transport header, but this is only true if pkt->fragoff == 0. This has little effect for rulesets with connection tracking/nat because these enable ip defra. For other rulesets this prevents false matches. Fixes: 96518518cc41 ("netfilter: add nftables") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/nft_exthdr.c | 2 +- net/netfilter/nft_payload.c | 9 +++++---- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c index dbe1f2e7dd9ed6..9e927ab4df1510 100644 --- a/net/netfilter/nft_exthdr.c +++ b/net/netfilter/nft_exthdr.c @@ -167,7 +167,7 @@ nft_tcp_header_pointer(const struct nft_pktinfo *pkt, { struct tcphdr *tcph; - if (pkt->tprot != IPPROTO_TCP) + if (pkt->tprot != IPPROTO_TCP || pkt->fragoff) return NULL; tcph = skb_header_pointer(pkt->skb, nft_thoff(pkt), sizeof(*tcph), buffer); diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c index ee359a4a60f5bf..b46e01365bd96d 100644 --- a/net/netfilter/nft_payload.c +++ b/net/netfilter/nft_payload.c @@ -84,7 +84,7 @@ static int __nft_payload_inner_offset(struct nft_pktinfo *pkt) { unsigned int thoff = nft_thoff(pkt); - if (!(pkt->flags & NFT_PKTINFO_L4PROTO)) + if (!(pkt->flags & NFT_PKTINFO_L4PROTO) || pkt->fragoff) return -1; switch (pkt->tprot) { @@ -148,7 +148,7 @@ void nft_payload_eval(const struct nft_expr *expr, offset = skb_network_offset(skb); break; case NFT_PAYLOAD_TRANSPORT_HEADER: - if (!(pkt->flags & NFT_PKTINFO_L4PROTO)) + if (!(pkt->flags & NFT_PKTINFO_L4PROTO) || pkt->fragoff) goto err; offset = nft_thoff(pkt); break; @@ -658,7 +658,7 @@ static void nft_payload_set_eval(const struct nft_expr *expr, offset = skb_network_offset(skb); break; case NFT_PAYLOAD_TRANSPORT_HEADER: - if (!(pkt->flags & NFT_PKTINFO_L4PROTO)) + if (!(pkt->flags & NFT_PKTINFO_L4PROTO) || pkt->fragoff) goto err; offset = nft_thoff(pkt); break; @@ -697,7 +697,8 @@ static void nft_payload_set_eval(const struct nft_expr *expr, if (priv->csum_type == NFT_PAYLOAD_CSUM_SCTP && pkt->tprot == IPPROTO_SCTP && skb->ip_summed != CHECKSUM_PARTIAL) { - if (nft_payload_csum_sctp(skb, nft_thoff(pkt))) + if (pkt->fragoff == 0 && + nft_payload_csum_sctp(skb, nft_thoff(pkt))) goto err; } From 6ed826c949cf3cb38aba063a55be260d52c0ce55 Mon Sep 17 00:00:00 2001 From: Alexander Gordeev Date: Mon, 27 Sep 2021 14:18:26 +0200 Subject: [PATCH 0324/1056] s390/boot: allocate amode31 section in decompressor [ Upstream commit e3ec8e0f5711d73f7e5d5c3cffdf4fad4f1487b9 ] The memory for amode31 section is allocated from the decompressed kernel. Instead, allocate that memory from the decompressor. This is a prerequisite to allow initialization of the virtual memory before the decompressed kernel takes over. Reviewed-by: Heiko Carstens Signed-off-by: Alexander Gordeev Signed-off-by: Vasily Gorbik Signed-off-by: Sasha Levin --- arch/s390/boot/compressed/decompressor.h | 1 + arch/s390/boot/startup.c | 8 ++++++++ arch/s390/kernel/entry.h | 1 + arch/s390/kernel/setup.c | 22 +++++++++------------- arch/s390/kernel/vmlinux.lds.S | 1 + 5 files changed, 20 insertions(+), 13 deletions(-) diff --git a/arch/s390/boot/compressed/decompressor.h b/arch/s390/boot/compressed/decompressor.h index a59f75c5b04903..f75cc31a77dd9f 100644 --- a/arch/s390/boot/compressed/decompressor.h +++ b/arch/s390/boot/compressed/decompressor.h @@ -24,6 +24,7 @@ struct vmlinux_info { unsigned long dynsym_start; unsigned long rela_dyn_start; unsigned long rela_dyn_end; + unsigned long amode31_size; }; /* Symbols defined by linker scripts */ diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c index b13352dd1e1cf4..1aa11a8f57dd82 100644 --- a/arch/s390/boot/startup.c +++ b/arch/s390/boot/startup.c @@ -15,6 +15,7 @@ #include "uv.h" unsigned long __bootdata_preserved(__kaslr_offset); +unsigned long __bootdata(__amode31_base); unsigned long __bootdata_preserved(VMALLOC_START); unsigned long __bootdata_preserved(VMALLOC_END); struct page *__bootdata_preserved(vmemmap); @@ -233,6 +234,12 @@ static void offset_vmlinux_info(unsigned long offset) vmlinux.dynsym_start += offset; } +static unsigned long reserve_amode31(unsigned long safe_addr) +{ + __amode31_base = PAGE_ALIGN(safe_addr); + return safe_addr + vmlinux.amode31_size; +} + void startup_kernel(void) { unsigned long random_lma; @@ -247,6 +254,7 @@ void startup_kernel(void) setup_lpp(); store_ipl_parmblock(); safe_addr = mem_safe_offset(); + safe_addr = reserve_amode31(safe_addr); safe_addr = read_ipl_report(safe_addr); uv_query_info(); rescue_initrd(safe_addr); diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h index 7f2696e8d511ed..6083090be1f46d 100644 --- a/arch/s390/kernel/entry.h +++ b/arch/s390/kernel/entry.h @@ -70,5 +70,6 @@ extern struct exception_table_entry _stop_amode31_ex_table[]; #define __amode31_data __section(".amode31.data") #define __amode31_ref __section(".amode31.refs") extern long _start_amode31_refs[], _end_amode31_refs[]; +extern unsigned long __amode31_base; #endif /* _ENTRY_H */ diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 8ede12c4ba6b50..e38de9e8ee13d6 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -95,10 +95,10 @@ EXPORT_SYMBOL(console_irq); * relocated above 2 GB, because it has to use 31 bit addresses. * Such code and data is part of the .amode31 section. */ -unsigned long __amode31_ref __samode31 = __pa(&_samode31); -unsigned long __amode31_ref __eamode31 = __pa(&_eamode31); -unsigned long __amode31_ref __stext_amode31 = __pa(&_stext_amode31); -unsigned long __amode31_ref __etext_amode31 = __pa(&_etext_amode31); +unsigned long __amode31_ref __samode31 = (unsigned long)&_samode31; +unsigned long __amode31_ref __eamode31 = (unsigned long)&_eamode31; +unsigned long __amode31_ref __stext_amode31 = (unsigned long)&_stext_amode31; +unsigned long __amode31_ref __etext_amode31 = (unsigned long)&_etext_amode31; struct exception_table_entry __amode31_ref *__start_amode31_ex_table = _start_amode31_ex_table; struct exception_table_entry __amode31_ref *__stop_amode31_ex_table = _stop_amode31_ex_table; @@ -149,6 +149,7 @@ struct mem_detect_info __bootdata(mem_detect); struct initrd_data __bootdata(initrd_data); unsigned long __bootdata_preserved(__kaslr_offset); +unsigned long __bootdata(__amode31_base); unsigned int __bootdata_preserved(zlib_dfltcc_support); EXPORT_SYMBOL(zlib_dfltcc_support); u64 __bootdata_preserved(stfle_fac_list[16]); @@ -800,6 +801,7 @@ static void __init reserve_kernel(void) memblock_reserve(0, STARTUP_NORMAL_OFFSET); memblock_reserve((unsigned long)sclp_early_sccb, EXT_SCCB_READ_SCP); + memblock_reserve(__amode31_base, __eamode31 - __samode31); memblock_reserve((unsigned long)_stext, PFN_PHYS(start_pfn) - (unsigned long)_stext); } @@ -820,20 +822,14 @@ static void __init setup_memory(void) static void __init relocate_amode31_section(void) { - unsigned long amode31_addr, amode31_size; - long amode31_offset; + unsigned long amode31_size = __eamode31 - __samode31; + long amode31_offset = __amode31_base - __samode31; long *ptr; - /* Allocate a new AMODE31 capable memory region */ - amode31_size = __eamode31 - __samode31; pr_info("Relocating AMODE31 section of size 0x%08lx\n", amode31_size); - amode31_addr = (unsigned long)memblock_alloc_low(amode31_size, PAGE_SIZE); - if (!amode31_addr) - panic("Failed to allocate memory for AMODE31 section\n"); - amode31_offset = amode31_addr - __samode31; /* Move original AMODE31 section to the new one */ - memmove((void *)amode31_addr, (void *)__samode31, amode31_size); + memmove((void *)__amode31_base, (void *)__samode31, amode31_size); /* Zero out the old AMODE31 section to catch invalid accesses within it */ memset((void *)__samode31, 0, amode31_size); diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S index 63bdb9e1bfc134..42c43521878ff7 100644 --- a/arch/s390/kernel/vmlinux.lds.S +++ b/arch/s390/kernel/vmlinux.lds.S @@ -212,6 +212,7 @@ SECTIONS QUAD(__dynsym_start) /* dynsym_start */ QUAD(__rela_dyn_start) /* rela_dyn_start */ QUAD(__rela_dyn_end) /* rela_dyn_end */ + QUAD(_eamode31 - _samode31) /* amode31_size */ } :NONE /* Debugging sections. */ From 06de5cf61538eca18aa2baccf65fa175e07c58d9 Mon Sep 17 00:00:00 2001 From: Alexander Gordeev Date: Thu, 21 Jan 2021 13:06:02 +0100 Subject: [PATCH 0325/1056] s390/setup: use physical pointers for memblock_reserve() [ Upstream commit 04f11ed7d8e018e1f01ebda5814ddfeb3a1e6ae1 ] memblock_reserve() function accepts physcal address of a memory block to be reserved, but provided with virtual memory pointers. Reviewed-by: Heiko Carstens Signed-off-by: Alexander Gordeev Signed-off-by: Vasily Gorbik Signed-off-by: Sasha Levin --- arch/s390/kernel/setup.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index e38de9e8ee13d6..2ebde341d057ad 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -797,13 +797,10 @@ static void __init check_initrd(void) */ static void __init reserve_kernel(void) { - unsigned long start_pfn = PFN_UP(__pa(_end)); - memblock_reserve(0, STARTUP_NORMAL_OFFSET); - memblock_reserve((unsigned long)sclp_early_sccb, EXT_SCCB_READ_SCP); memblock_reserve(__amode31_base, __eamode31 - __samode31); - memblock_reserve((unsigned long)_stext, PFN_PHYS(start_pfn) - - (unsigned long)_stext); + memblock_reserve(__pa(sclp_early_sccb), EXT_SCCB_READ_SCP); + memblock_reserve(__pa(_stext), _end - _stext); } static void __init setup_memory(void) From a29c71f3a4b17ce49feacc6e610e953ef70d7dfc Mon Sep 17 00:00:00 2001 From: Alexander Egorenkov Date: Wed, 9 Feb 2022 11:25:09 +0100 Subject: [PATCH 0326/1056] s390/setup: preserve memory at OLDMEM_BASE and OLDMEM_SIZE [ Upstream commit 6b4b54c7ca347bcb4aa7a3cc01aa16e84ac7fbe4 ] We need to preserve the values at OLDMEM_BASE and OLDMEM_SIZE which are used by zgetdump in case when kdump crashes. In that case zgetdump will attempt to read OLDMEM_BASE and OLDMEM_SIZE in order to find out where the memory range [0 - OLDMEM_SIZE] belonging to the production kernel is. Fixes: f1a546947431 ("s390/setup: don't reserve memory that occupied decompressor's head") Cc: stable@vger.kernel.org # 5.15+ Signed-off-by: Alexander Egorenkov Acked-by: Vasily Gorbik Signed-off-by: Vasily Gorbik Signed-off-by: Sasha Levin --- arch/s390/kernel/setup.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 2ebde341d057ad..36c1f31dfd66f7 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -798,6 +798,8 @@ static void __init check_initrd(void) static void __init reserve_kernel(void) { memblock_reserve(0, STARTUP_NORMAL_OFFSET); + memblock_reserve(OLDMEM_BASE, sizeof(unsigned long)); + memblock_reserve(OLDMEM_SIZE, sizeof(unsigned long)); memblock_reserve(__amode31_base, __eamode31 - __samode31); memblock_reserve(__pa(sclp_early_sccb), EXT_SCCB_READ_SCP); memblock_reserve(__pa(_stext), _end - _stext); From 4fe1439ef2e0ff762baf9d5af547cdc1c76bcdea Mon Sep 17 00:00:00 2001 From: Sukadev Bhattiprolu Date: Thu, 24 Feb 2022 22:23:56 -0800 Subject: [PATCH 0327/1056] ibmvnic: init init_done_rc earlier [ Upstream commit ae16bf15374d8b055e040ac6f3f1147ab1c9bb7d ] We currently initialize the ->init_done completion/return code fields before issuing a CRQ_INIT command. But if we get a transport event soon after registering the CRQ the taskslet may already have recorded the completion and error code. If we initialize here, we might overwrite/ lose that and end up issuing the CRQ_INIT only to timeout later. If that timeout happens during probe, we will leave the adapter in the DOWN state rather than retrying to register/init the CRQ. Initialize the completion before registering the CRQ so we don't lose the notification. Fixes: 032c5e82847a ("Driver for IBM System i/p VNIC protocol") Signed-off-by: Sukadev Bhattiprolu Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/ibm/ibmvnic.c | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index b262aa84b6a245..70267bd73429fc 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -2063,6 +2063,19 @@ static const char *reset_reason_to_string(enum ibmvnic_reset_reason reason) return "UNKNOWN"; } +/* + * Initialize the init_done completion and return code values. We + * can get a transport event just after registering the CRQ and the + * tasklet will use this to communicate the transport event. To ensure + * we don't miss the notification/error, initialize these _before_ + * regisering the CRQ. + */ +static inline void reinit_init_done(struct ibmvnic_adapter *adapter) +{ + reinit_completion(&adapter->init_done); + adapter->init_done_rc = 0; +} + /* * do_reset returns zero if we are able to keep processing reset events, or * non-zero if we hit a fatal error and must halt. @@ -2169,6 +2182,8 @@ static int do_reset(struct ibmvnic_adapter *adapter, */ adapter->state = VNIC_PROBED; + reinit_init_done(adapter); + if (adapter->reset_reason == VNIC_RESET_CHANGE_PARAM) { rc = init_crq_queue(adapter); } else if (adapter->reset_reason == VNIC_RESET_MOBILITY) { @@ -2314,7 +2329,8 @@ static int do_hard_reset(struct ibmvnic_adapter *adapter, */ adapter->state = VNIC_PROBED; - reinit_completion(&adapter->init_done); + reinit_init_done(adapter); + rc = init_crq_queue(adapter); if (rc) { netdev_err(adapter->netdev, @@ -5485,10 +5501,6 @@ static int ibmvnic_reset_init(struct ibmvnic_adapter *adapter, bool reset) adapter->from_passive_init = false; - if (reset) - reinit_completion(&adapter->init_done); - - adapter->init_done_rc = 0; rc = ibmvnic_send_crq_init(adapter); if (rc) { dev_err(dev, "Send crq init failed with error %d\n", rc); @@ -5502,12 +5514,14 @@ static int ibmvnic_reset_init(struct ibmvnic_adapter *adapter, bool reset) if (adapter->init_done_rc) { release_crq_queue(adapter); + dev_err(dev, "CRQ-init failed, %d\n", adapter->init_done_rc); return adapter->init_done_rc; } if (adapter->from_passive_init) { adapter->state = VNIC_OPEN; adapter->from_passive_init = false; + dev_err(dev, "CRQ-init failed, passive-init\n"); return -1; } @@ -5596,6 +5610,8 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id) init_success = false; do { + reinit_init_done(adapter); + rc = init_crq_queue(adapter); if (rc) { dev_err(&dev->dev, "Couldn't initialize crq. rc=%d\n", From 85996ef1795206c82f7c66f38cce36215d8f860f Mon Sep 17 00:00:00 2001 From: Sukadev Bhattiprolu Date: Thu, 24 Feb 2022 22:23:57 -0800 Subject: [PATCH 0328/1056] ibmvnic: clear fop when retrying probe [ Upstream commit f628ad531b4f34fdba0984255b4a2850dd369513 ] Clear ->failover_pending flag that may have been set in the previous pass of registering CRQ. If we don't clear, a subsequent ibmvnic_open() call would be misled into thinking a failover is pending and assuming that the reset worker thread would open the adapter. If this pass of registering the CRQ succeeds (i.e there is no transport event), there wouldn't be a reset worker thread. This would leave the adapter unconfigured and require manual intervention to bring it up during boot. Fixes: 5a18e1e0c193 ("ibmvnic: Fix failover case for non-redundant configuration") Signed-off-by: Sukadev Bhattiprolu Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/ibm/ibmvnic.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index 70267bd73429fc..9f4f40564d74e1 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -5612,6 +5612,11 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id) do { reinit_init_done(adapter); + /* clear any failovers we got in the previous pass + * since we are reinitializing the CRQ + */ + adapter->failover_pending = false; + rc = init_crq_queue(adapter); if (rc) { dev_err(&dev->dev, "Couldn't initialize crq. rc=%d\n", From 32ac44b70e171e3478549758a6b3c5f66e2ae474 Mon Sep 17 00:00:00 2001 From: Sukadev Bhattiprolu Date: Thu, 24 Feb 2022 22:23:58 -0800 Subject: [PATCH 0329/1056] ibmvnic: Allow queueing resets during probe [ Upstream commit fd98693cb0721317f27341951593712c580c36a1 ] We currently don't allow queuing resets when adapter is in VNIC_PROBING state - instead we throw away the reset and return EBUSY. The reasoning is probably that during ibmvnic_probe() the ibmvnic_adapter itself is being initialized so performing a reset during this time can lead us to accessing fields in the ibmvnic_adapter that are not fully initialized. A review of the code shows that all the adapter state neede to process a reset is initialized before registering the CRQ so that should no longer be a concern. Further the expectation is that if we do get a reset (transport event) during probe, the do..while() loop in ibmvnic_probe() will handle this by reinitializing the CRQ. While that is true to some extent, it is possible that the reset might occur _after_ the CRQ is registered and CRQ_INIT message was exchanged but _before_ the adapter state is set to VNIC_PROBED. As mentioned above, such a reset will be thrown away. While the client assumes that the adapter is functional, the vnic server will wait for the client to reinit the adapter. This disconnect between the two leaves the adapter down needing manual intervention. Because ibmvnic_probe() has other work to do after initializing the CRQ (such as registering the netdev at a minimum) and because the reset event can occur at any instant after the CRQ is initialized, there will always be a window between initializing the CRQ and considering the adapter ready for resets (ie state == PROBED). So rather than discarding resets during this window, allow queueing them - but only process them after the adapter is fully initialized. To do this, introduce a new completion state ->probe_done and have the reset worker thread wait on this before processing resets. This change brings up two new situations in or just after ibmvnic_probe(). First after one or more resets were queued, we encounter an error and decide to retry the initialization. At that point the queued resets are no longer relevant since we could be talking to a new vnic server. So we must purge/flush the queued resets before restarting the initialization. As a side note, since we are still in the probing stage and we have not registered the netdev, it will not be CHANGE_PARAM reset. Second this change opens up a potential race between the worker thread in __ibmvnic_reset(), the tasklet and the ibmvnic_open() due to the following sequence of events: 1. Register CRQ 2. Get transport event before CRQ_INIT completes. 3. Tasklet schedules reset: a) add rwi to list b) schedule_work() to start worker thread which runs and waits for ->probe_done. 4. ibmvnic_probe() decides to retry, purges rwi_list 5. Re-register crq and this time rest of probe succeeds - register netdev and complete(->probe_done). 6. Worker thread resumes in __ibmvnic_reset() from 3b. 7. Worker thread sets ->resetting bit 8. ibmvnic_open() comes in, notices ->resetting bit, sets state to IBMVNIC_OPEN and returns early expecting worker thread to finish the open. 9. Worker thread finds rwi_list empty and returns without opening the interface. If this happens, the ->ndo_open() call is effectively lost and the interface remains down. To address this, ensure that ->rwi_list is not empty before setting the ->resetting bit. See also comments in __ibmvnic_reset(). Fixes: 6a2fb0e99f9c ("ibmvnic: driver initialization for kdump/kexec") Signed-off-by: Sukadev Bhattiprolu Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/ibm/ibmvnic.c | 107 ++++++++++++++++++++++++++--- drivers/net/ethernet/ibm/ibmvnic.h | 1 + 2 files changed, 98 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index 9f4f40564d74e1..28344c3dfea1b2 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -2471,23 +2471,82 @@ static int do_passive_init(struct ibmvnic_adapter *adapter) static void __ibmvnic_reset(struct work_struct *work) { struct ibmvnic_adapter *adapter; - bool saved_state = false; + unsigned int timeout = 5000; struct ibmvnic_rwi *tmprwi; + bool saved_state = false; struct ibmvnic_rwi *rwi; unsigned long flags; - u32 reset_state; + struct device *dev; + bool need_reset; int num_fails = 0; + u32 reset_state; int rc = 0; adapter = container_of(work, struct ibmvnic_adapter, ibmvnic_reset); + dev = &adapter->vdev->dev; - if (test_and_set_bit_lock(0, &adapter->resetting)) { + /* Wait for ibmvnic_probe() to complete. If probe is taking too long + * or if another reset is in progress, defer work for now. If probe + * eventually fails it will flush and terminate our work. + * + * Three possibilities here: + * 1. Adpater being removed - just return + * 2. Timed out on probe or another reset in progress - delay the work + * 3. Completed probe - perform any resets in queue + */ + if (adapter->state == VNIC_PROBING && + !wait_for_completion_timeout(&adapter->probe_done, timeout)) { + dev_err(dev, "Reset thread timed out on probe"); queue_delayed_work(system_long_wq, &adapter->ibmvnic_delayed_reset, IBMVNIC_RESET_DELAY); return; } + /* adapter is done with probe (i.e state is never VNIC_PROBING now) */ + if (adapter->state == VNIC_REMOVING) + return; + + /* ->rwi_list is stable now (no one else is removing entries) */ + + /* ibmvnic_probe() may have purged the reset queue after we were + * scheduled to process a reset so there maybe no resets to process. + * Before setting the ->resetting bit though, we have to make sure + * that there is infact a reset to process. Otherwise we may race + * with ibmvnic_open() and end up leaving the vnic down: + * + * __ibmvnic_reset() ibmvnic_open() + * ----------------- -------------- + * + * set ->resetting bit + * find ->resetting bit is set + * set ->state to IBMVNIC_OPEN (i.e + * assume reset will open device) + * return + * find reset queue empty + * return + * + * Neither performed vnic login/open and vnic stays down + * + * If we hold the lock and conditionally set the bit, either we + * or ibmvnic_open() will complete the open. + */ + need_reset = false; + spin_lock(&adapter->rwi_lock); + if (!list_empty(&adapter->rwi_list)) { + if (test_and_set_bit_lock(0, &adapter->resetting)) { + queue_delayed_work(system_long_wq, + &adapter->ibmvnic_delayed_reset, + IBMVNIC_RESET_DELAY); + } else { + need_reset = true; + } + } + spin_unlock(&adapter->rwi_lock); + + if (!need_reset) + return; + rwi = get_next_rwi(adapter); while (rwi) { spin_lock_irqsave(&adapter->state_lock, flags); @@ -2639,13 +2698,6 @@ static int ibmvnic_reset(struct ibmvnic_adapter *adapter, goto err; } - if (adapter->state == VNIC_PROBING) { - netdev_warn(netdev, "Adapter reset during probe\n"); - adapter->init_done_rc = -EAGAIN; - ret = EAGAIN; - goto err; - } - list_for_each_entry(tmp, &adapter->rwi_list, list) { if (tmp->reset_reason == reason) { netdev_dbg(netdev, "Skipping matching reset, reason=%s\n", @@ -5561,6 +5613,7 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id) struct ibmvnic_adapter *adapter; struct net_device *netdev; unsigned char *mac_addr_p; + unsigned long flags; bool init_success; int rc; @@ -5602,6 +5655,7 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id) spin_lock_init(&adapter->rwi_lock); spin_lock_init(&adapter->state_lock); mutex_init(&adapter->fw_lock); + init_completion(&adapter->probe_done); init_completion(&adapter->init_done); init_completion(&adapter->fw_done); init_completion(&adapter->reset_done); @@ -5617,6 +5671,26 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id) */ adapter->failover_pending = false; + /* If we had already initialized CRQ, we may have one or + * more resets queued already. Discard those and release + * the CRQ before initializing the CRQ again. + */ + release_crq_queue(adapter); + + /* Since we are still in PROBING state, __ibmvnic_reset() + * will not access the ->rwi_list and since we released CRQ, + * we won't get _new_ transport events. But there maybe an + * ongoing ibmvnic_reset() call. So serialize access to + * rwi_list. If we win the race, ibvmnic_reset() could add + * a reset after we purged but thats ok - we just may end + * up with an extra reset (i.e similar to having two or more + * resets in the queue at once). + * CHECK. + */ + spin_lock_irqsave(&adapter->rwi_lock, flags); + flush_reset_queue(adapter); + spin_unlock_irqrestore(&adapter->rwi_lock, flags); + rc = init_crq_queue(adapter); if (rc) { dev_err(&dev->dev, "Couldn't initialize crq. rc=%d\n", @@ -5668,6 +5742,8 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id) } dev_info(&dev->dev, "ibmvnic registered\n"); + complete(&adapter->probe_done); + return 0; ibmvnic_register_fail: @@ -5682,6 +5758,17 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id) ibmvnic_init_fail: release_sub_crqs(adapter, 1); release_crq_queue(adapter); + + /* cleanup worker thread after releasing CRQ so we don't get + * transport events (i.e new work items for the worker thread). + */ + adapter->state = VNIC_REMOVING; + complete(&adapter->probe_done); + flush_work(&adapter->ibmvnic_reset); + flush_delayed_work(&adapter->ibmvnic_delayed_reset); + + flush_reset_queue(adapter); + mutex_destroy(&adapter->fw_lock); free_netdev(netdev); diff --git a/drivers/net/ethernet/ibm/ibmvnic.h b/drivers/net/ethernet/ibm/ibmvnic.h index 1a9ed9202654fc..b01c439965ff91 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.h +++ b/drivers/net/ethernet/ibm/ibmvnic.h @@ -927,6 +927,7 @@ struct ibmvnic_adapter { struct ibmvnic_tx_pool *tx_pool; struct ibmvnic_tx_pool *tso_pool; + struct completion probe_done; struct completion init_done; int init_done_rc; From 5c82c94b0be70f1c56c8ef5c6cdd73d711e98066 Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Wed, 1 Sep 2021 16:14:34 +0300 Subject: [PATCH 0330/1056] virtio-blk: avoid preallocating big SGL for data [ Upstream commit 02746e26c39ee473b975e0f68d1295abc92672ed ] No need to pre-allocate a big buffer for the IO SGL anymore. If a device has lots of deep queues, preallocation for the sg list can consume substantial amounts of memory. For HW virtio-blk device, nr_hw_queues can be 64 or 128 and each queue's depth might be 128. This means the resulting preallocation for the data SGLs is big. Switch to runtime allocation for SGL for lists longer than 2 entries. This is the approach used by NVMe drivers so it should be reasonable for virtio block as well. Runtime SGL allocation has always been the case for the legacy I/O path so this is nothing new. The preallocated small SGL depends on SG_CHAIN so if the ARCH doesn't support SG_CHAIN, use only runtime allocation for the SGL. Re-organize the setup of the IO request to fit the new sg chain mechanism. No performance degradation was seen (fio libaio engine with 16 jobs and 128 iodepth): IO size IOPs Rand Read (before/after) IOPs Rand Write (before/after) -------- --------------------------------- ---------------------------------- 512B 318K/316K 329K/325K 4KB 323K/321K 353K/349K 16KB 199K/208K 250K/275K 128KB 36K/36.1K 39.2K/41.7K Signed-off-by: Max Gurtovoy Reviewed-by: Israel Rukshin Link: https://lore.kernel.org/r/20210901131434.31158-1-mgurtovoy@nvidia.com Reviewed-by: Feng Li Reviewed-by: Stefan Hajnoczi Tested-by: Stefan Hajnoczi Reviewed-by: Christoph Hellwig Signed-off-by: Arnd Bergmann # kconfig fixups Signed-off-by: Michael S. Tsirkin Signed-off-by: Sasha Levin --- drivers/block/Kconfig | 1 + drivers/block/virtio_blk.c | 156 ++++++++++++++++++++++++------------- 2 files changed, 101 insertions(+), 56 deletions(-) diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig index f93cb989241cc6..28ed157b1203b8 100644 --- a/drivers/block/Kconfig +++ b/drivers/block/Kconfig @@ -410,6 +410,7 @@ config XEN_BLKDEV_BACKEND config VIRTIO_BLK tristate "Virtio block driver" depends on VIRTIO + select SG_POOL help This is the virtual block driver for virtio. It can be used with QEMU based VMMs (like KVM or Xen). Say Y or M. diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index c05138a284755a..ccc5770d7654ed 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -24,6 +24,12 @@ /* The maximum number of sg elements that fit into a virtqueue */ #define VIRTIO_BLK_MAX_SG_ELEMS 32768 +#ifdef CONFIG_ARCH_NO_SG_CHAIN +#define VIRTIO_BLK_INLINE_SG_CNT 0 +#else +#define VIRTIO_BLK_INLINE_SG_CNT 2 +#endif + static int major; static DEFINE_IDA(vd_index_ida); @@ -77,6 +83,7 @@ struct virtio_blk { struct virtblk_req { struct virtio_blk_outhdr out_hdr; u8 status; + struct sg_table sg_table; struct scatterlist sg[]; }; @@ -162,12 +169,92 @@ static int virtblk_setup_discard_write_zeroes(struct request *req, bool unmap) return 0; } -static inline void virtblk_request_done(struct request *req) +static void virtblk_unmap_data(struct request *req, struct virtblk_req *vbr) { - struct virtblk_req *vbr = blk_mq_rq_to_pdu(req); + if (blk_rq_nr_phys_segments(req)) + sg_free_table_chained(&vbr->sg_table, + VIRTIO_BLK_INLINE_SG_CNT); +} + +static int virtblk_map_data(struct blk_mq_hw_ctx *hctx, struct request *req, + struct virtblk_req *vbr) +{ + int err; + + if (!blk_rq_nr_phys_segments(req)) + return 0; + + vbr->sg_table.sgl = vbr->sg; + err = sg_alloc_table_chained(&vbr->sg_table, + blk_rq_nr_phys_segments(req), + vbr->sg_table.sgl, + VIRTIO_BLK_INLINE_SG_CNT); + if (unlikely(err)) + return -ENOMEM; + return blk_rq_map_sg(hctx->queue, req, vbr->sg_table.sgl); +} + +static void virtblk_cleanup_cmd(struct request *req) +{ if (req->rq_flags & RQF_SPECIAL_PAYLOAD) kfree(bvec_virt(&req->special_vec)); +} + +static int virtblk_setup_cmd(struct virtio_device *vdev, struct request *req, + struct virtblk_req *vbr) +{ + bool unmap = false; + u32 type; + + vbr->out_hdr.sector = 0; + + switch (req_op(req)) { + case REQ_OP_READ: + type = VIRTIO_BLK_T_IN; + vbr->out_hdr.sector = cpu_to_virtio64(vdev, + blk_rq_pos(req)); + break; + case REQ_OP_WRITE: + type = VIRTIO_BLK_T_OUT; + vbr->out_hdr.sector = cpu_to_virtio64(vdev, + blk_rq_pos(req)); + break; + case REQ_OP_FLUSH: + type = VIRTIO_BLK_T_FLUSH; + break; + case REQ_OP_DISCARD: + type = VIRTIO_BLK_T_DISCARD; + break; + case REQ_OP_WRITE_ZEROES: + type = VIRTIO_BLK_T_WRITE_ZEROES; + unmap = !(req->cmd_flags & REQ_NOUNMAP); + break; + case REQ_OP_DRV_IN: + type = VIRTIO_BLK_T_GET_ID; + break; + default: + WARN_ON_ONCE(1); + return BLK_STS_IOERR; + } + + vbr->out_hdr.type = cpu_to_virtio32(vdev, type); + vbr->out_hdr.ioprio = cpu_to_virtio32(vdev, req_get_ioprio(req)); + + if (type == VIRTIO_BLK_T_DISCARD || type == VIRTIO_BLK_T_WRITE_ZEROES) { + if (virtblk_setup_discard_write_zeroes(req, unmap)) + return BLK_STS_RESOURCE; + } + + return 0; +} + +static inline void virtblk_request_done(struct request *req) +{ + struct virtblk_req *vbr = blk_mq_rq_to_pdu(req); + + virtblk_unmap_data(req, vbr); + virtblk_cleanup_cmd(req); blk_mq_end_request(req, virtblk_result(vbr)); } @@ -225,57 +312,23 @@ static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx *hctx, int qid = hctx->queue_num; int err; bool notify = false; - bool unmap = false; - u32 type; BUG_ON(req->nr_phys_segments + 2 > vblk->sg_elems); - switch (req_op(req)) { - case REQ_OP_READ: - case REQ_OP_WRITE: - type = 0; - break; - case REQ_OP_FLUSH: - type = VIRTIO_BLK_T_FLUSH; - break; - case REQ_OP_DISCARD: - type = VIRTIO_BLK_T_DISCARD; - break; - case REQ_OP_WRITE_ZEROES: - type = VIRTIO_BLK_T_WRITE_ZEROES; - unmap = !(req->cmd_flags & REQ_NOUNMAP); - break; - case REQ_OP_DRV_IN: - type = VIRTIO_BLK_T_GET_ID; - break; - default: - WARN_ON_ONCE(1); - return BLK_STS_IOERR; - } - - vbr->out_hdr.type = cpu_to_virtio32(vblk->vdev, type); - vbr->out_hdr.sector = type ? - 0 : cpu_to_virtio64(vblk->vdev, blk_rq_pos(req)); - vbr->out_hdr.ioprio = cpu_to_virtio32(vblk->vdev, req_get_ioprio(req)); + err = virtblk_setup_cmd(vblk->vdev, req, vbr); + if (unlikely(err)) + return err; blk_mq_start_request(req); - if (type == VIRTIO_BLK_T_DISCARD || type == VIRTIO_BLK_T_WRITE_ZEROES) { - err = virtblk_setup_discard_write_zeroes(req, unmap); - if (err) - return BLK_STS_RESOURCE; - } - - num = blk_rq_map_sg(hctx->queue, req, vbr->sg); - if (num) { - if (rq_data_dir(req) == WRITE) - vbr->out_hdr.type |= cpu_to_virtio32(vblk->vdev, VIRTIO_BLK_T_OUT); - else - vbr->out_hdr.type |= cpu_to_virtio32(vblk->vdev, VIRTIO_BLK_T_IN); + num = virtblk_map_data(hctx, req, vbr); + if (unlikely(num < 0)) { + virtblk_cleanup_cmd(req); + return BLK_STS_RESOURCE; } spin_lock_irqsave(&vblk->vqs[qid].lock, flags); - err = virtblk_add_req(vblk->vqs[qid].vq, vbr, vbr->sg, num); + err = virtblk_add_req(vblk->vqs[qid].vq, vbr, vbr->sg_table.sgl, num); if (err) { virtqueue_kick(vblk->vqs[qid].vq); /* Don't stop the queue if -ENOMEM: we may have failed to @@ -284,6 +337,8 @@ static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx *hctx, if (err == -ENOSPC) blk_mq_stop_hw_queue(hctx); spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags); + virtblk_unmap_data(req, vbr); + virtblk_cleanup_cmd(req); switch (err) { case -ENOSPC: return BLK_STS_DEV_RESOURCE; @@ -660,16 +715,6 @@ static const struct attribute_group *virtblk_attr_groups[] = { NULL, }; -static int virtblk_init_request(struct blk_mq_tag_set *set, struct request *rq, - unsigned int hctx_idx, unsigned int numa_node) -{ - struct virtio_blk *vblk = set->driver_data; - struct virtblk_req *vbr = blk_mq_rq_to_pdu(rq); - - sg_init_table(vbr->sg, vblk->sg_elems); - return 0; -} - static int virtblk_map_queues(struct blk_mq_tag_set *set) { struct virtio_blk *vblk = set->driver_data; @@ -682,7 +727,6 @@ static const struct blk_mq_ops virtio_mq_ops = { .queue_rq = virtio_queue_rq, .commit_rqs = virtio_commit_rqs, .complete = virtblk_request_done, - .init_request = virtblk_init_request, .map_queues = virtblk_map_queues, }; @@ -762,7 +806,7 @@ static int virtblk_probe(struct virtio_device *vdev) vblk->tag_set.flags = BLK_MQ_F_SHOULD_MERGE; vblk->tag_set.cmd_size = sizeof(struct virtblk_req) + - sizeof(struct scatterlist) * sg_elems; + sizeof(struct scatterlist) * VIRTIO_BLK_INLINE_SG_CNT; vblk->tag_set.driver_data = vblk; vblk->tag_set.nr_hw_queues = vblk->num_vqs; From df1ec53252d5b5b26ea49e30438741c9a6d89857 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sun, 20 Mar 2022 13:08:38 -0600 Subject: [PATCH 0331/1056] io_uring: ensure that fsnotify is always called [ Upstream commit f63cf5192fe3418ad5ae1a4412eba5694b145f79 ] Ensure that we call fsnotify_modify() if we write a file, and that we do fsnotify_access() if we read it. This enables anyone using inotify on the file to get notified. Ditto for fallocate, ensure that fsnotify_modify() is called. Cc: stable@vger.kernel.org Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- fs/io_uring.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 1893237403247d..0c5dcda0b62215 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -2686,8 +2686,12 @@ static bool io_rw_should_reissue(struct io_kiocb *req) static bool __io_complete_rw_common(struct io_kiocb *req, long res) { - if (req->rw.kiocb.ki_flags & IOCB_WRITE) + if (req->rw.kiocb.ki_flags & IOCB_WRITE) { kiocb_end_write(req); + fsnotify_modify(req->file); + } else { + fsnotify_access(req->file); + } if (res != req->result) { if ((res == -EAGAIN || res == -EOPNOTSUPP) && io_rw_should_reissue(req)) { @@ -4183,6 +4187,8 @@ static int io_fallocate(struct io_kiocb *req, unsigned int issue_flags) req->sync.len); if (ret < 0) req_set_fail(req); + else + fsnotify_modify(req->file); io_req_complete(req, ret); return 0; } From eb79d1353cd0f2c04e1fc6311d7ce1d40a69b790 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Thu, 14 Oct 2021 15:03:28 +0100 Subject: [PATCH 0332/1056] block: use bdev_get_queue() in bio.c [ Upstream commit 3caee4634be68e755d2fb130962f1623661dbd5b ] Convert bdev->bd_disk->queue to bdev_get_queue(), it's uses a cached queue pointer and so is faster. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/85c36ea784d285a5075baa10049e6b59e15fb484.1634219547.git.asml.silence@gmail.com Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- block/bio.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/block/bio.c b/block/bio.c index 8381c6690dd6c5..365bb63626696c 100644 --- a/block/bio.c +++ b/block/bio.c @@ -910,7 +910,7 @@ EXPORT_SYMBOL(bio_add_pc_page); int bio_add_zone_append_page(struct bio *bio, struct page *page, unsigned int len, unsigned int offset) { - struct request_queue *q = bio->bi_bdev->bd_disk->queue; + struct request_queue *q = bdev_get_queue(bio->bi_bdev); bool same_page = false; if (WARN_ON_ONCE(bio_op(bio) != REQ_OP_ZONE_APPEND)) @@ -1054,7 +1054,7 @@ static int bio_iov_bvec_set(struct bio *bio, struct iov_iter *iter) static int bio_iov_bvec_set_append(struct bio *bio, struct iov_iter *iter) { - struct request_queue *q = bio->bi_bdev->bd_disk->queue; + struct request_queue *q = bdev_get_queue(bio->bi_bdev); struct iov_iter i = *iter; iov_iter_truncate(&i, queue_max_zone_append_sectors(q) << 9); @@ -1132,7 +1132,7 @@ static int __bio_iov_append_get_pages(struct bio *bio, struct iov_iter *iter) { unsigned short nr_pages = bio->bi_max_vecs - bio->bi_vcnt; unsigned short entries_left = bio->bi_max_vecs - bio->bi_vcnt; - struct request_queue *q = bio->bi_bdev->bd_disk->queue; + struct request_queue *q = bdev_get_queue(bio->bi_bdev); unsigned int max_append_sectors = queue_max_zone_append_sectors(q); struct bio_vec *bv = bio->bi_io_vec + bio->bi_vcnt; struct page **pages = (struct page **)bv; @@ -1471,10 +1471,10 @@ void bio_endio(struct bio *bio) return; if (bio->bi_bdev && bio_flagged(bio, BIO_TRACKED)) - rq_qos_done_bio(bio->bi_bdev->bd_disk->queue, bio); + rq_qos_done_bio(bdev_get_queue(bio->bi_bdev), bio); if (bio->bi_bdev && bio_flagged(bio, BIO_TRACE_COMPLETION)) { - trace_block_bio_complete(bio->bi_bdev->bd_disk->queue, bio); + trace_block_bio_complete(bdev_get_queue(bio->bi_bdev), bio); bio_clear_flag(bio, BIO_TRACE_COMPLETION); } From 13141cceadd07252880caea92b4617eed19c11cc Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 15 Oct 2021 20:06:18 -0600 Subject: [PATCH 0333/1056] block: only mark bio as tracked if it really is tracked [ Upstream commit 90b8faa0e8de1b02b619fb33f6c6e1e13e7d1d70 ] We set BIO_TRACKED unconditionally when rq_qos_throttle() is called, even though we may not even have an rq_qos handler. Only mark it as TRACKED if it really is potentially tracked. This saves considerable time for the case where the bio isn't tracked: 2.64% -1.65% [kernel.vmlinux] [k] bio_endio Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- block/blk-rq-qos.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/block/blk-rq-qos.h b/block/blk-rq-qos.h index f000f83e0621c8..3cfbc8668cba98 100644 --- a/block/blk-rq-qos.h +++ b/block/blk-rq-qos.h @@ -189,9 +189,10 @@ static inline void rq_qos_throttle(struct request_queue *q, struct bio *bio) * BIO_TRACKED lets controllers know that a bio went through the * normal rq_qos path. */ - bio_set_flag(bio, BIO_TRACKED); - if (q->rq_qos) + if (q->rq_qos) { + bio_set_flag(bio, BIO_TRACKED); __rq_qos_throttle(q->rq_qos, bio); + } } static inline void rq_qos_track(struct request_queue *q, struct request *rq, From af9452dfdba4bf7359ef7645eee2d243a1df0649 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sun, 13 Mar 2022 21:15:02 -1000 Subject: [PATCH 0334/1056] block: fix rq-qos breakage from skipping rq_qos_done_bio() [ Upstream commit aa1b46dcdc7baaf5fec0be25782ef24b26aa209e ] a647a524a467 ("block: don't call rq_qos_ops->done_bio if the bio isn't tracked") made bio_endio() skip rq_qos_done_bio() if BIO_TRACKED is not set. While this fixed a potential oops, it also broke blk-iocost by skipping the done_bio callback for merged bios. Before, whether a bio goes through rq_qos_throttle() or rq_qos_merge(), rq_qos_done_bio() would be called on the bio on completion with BIO_TRACKED distinguishing the former from the latter. rq_qos_done_bio() is not called for bios which wenth through rq_qos_merge(). This royally confuses blk-iocost as the merged bios never finish and are considered perpetually in-flight. One reliably reproducible failure mode is an intermediate cgroup geting stuck active preventing its children from being activated due to the leaf-only rule, leading to loss of control. The following is from resctl-bench protection scenario which emulates isolating a web server like workload from a memory bomb run on an iocost configuration which should yield a reasonable level of protection. # cat /sys/block/nvme2n1/device/model Samsung SSD 970 PRO 512GB # cat /sys/fs/cgroup/io.cost.model 259:0 ctrl=user model=linear rbps=834913556 rseqiops=93622 rrandiops=102913 wbps=618985353 wseqiops=72325 wrandiops=71025 # cat /sys/fs/cgroup/io.cost.qos 259:0 enable=1 ctrl=user rpct=95.00 rlat=18776 wpct=95.00 wlat=8897 min=60.00 max=100.00 # resctl-bench -m 29.6G -r out.json run protection::scenario=mem-hog,loops=1 ... Memory Hog Summary ================== IO Latency: R p50=242u:336u/2.5m p90=794u:1.4m/7.5m p99=2.7m:8.0m/62.5m max=8.0m:36.4m/350m W p50=221u:323u/1.5m p90=709u:1.2m/5.5m p99=1.5m:2.5m/9.5m max=6.9m:35.9m/350m Isolation and Request Latency Impact Distributions: min p01 p05 p10 p25 p50 p75 p90 p95 p99 max mean stdev isol% 15.90 15.90 15.90 40.05 57.24 59.07 60.01 74.63 74.63 90.35 90.35 58.12 15.82 lat-imp% 0 0 0 0 0 4.55 14.68 15.54 233.5 548.1 548.1 53.88 143.6 Result: isol=58.12:15.82% lat_imp=53.88%:143.6 work_csv=100.0% missing=3.96% The isolation result of 58.12% is close to what this device would show without any IO control. Fix it by introducing a new flag BIO_QOS_MERGED to mark merged bios and calling rq_qos_done_bio() on them too. For consistency and clarity, rename BIO_TRACKED to BIO_QOS_THROTTLED. The flag checks are moved into rq_qos_done_bio() so that it's next to the code paths that set the flags. With the patch applied, the above same benchmark shows: # resctl-bench -m 29.6G -r out.json run protection::scenario=mem-hog,loops=1 ... Memory Hog Summary ================== IO Latency: R p50=123u:84.4u/985u p90=322u:256u/2.5m p99=1.6m:1.4m/9.5m max=11.1m:36.0m/350m W p50=429u:274u/995u p90=1.7m:1.3m/4.5m p99=3.4m:2.7m/11.5m max=7.9m:5.9m/26.5m Isolation and Request Latency Impact Distributions: min p01 p05 p10 p25 p50 p75 p90 p95 p99 max mean stdev isol% 84.91 84.91 89.51 90.73 92.31 94.49 96.36 98.04 98.71 100.0 100.0 94.42 2.81 lat-imp% 0 0 0 0 0 2.81 5.73 11.11 13.92 17.53 22.61 4.10 4.68 Result: isol=94.42:2.81% lat_imp=4.10%:4.68 work_csv=58.34% missing=0% Signed-off-by: Tejun Heo Fixes: a647a524a467 ("block: don't call rq_qos_ops->done_bio if the bio isn't tracked") Cc: stable@vger.kernel.org # v5.15+ Cc: Ming Lei Cc: Yu Kuai Reviewed-by: Ming Lei Link: https://lore.kernel.org/r/Yi7rdrzQEHjJLGKB@slm.duckdns.org Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- block/bio.c | 3 +-- block/blk-iolatency.c | 2 +- block/blk-rq-qos.h | 20 +++++++++++--------- include/linux/blk_types.h | 3 ++- 4 files changed, 15 insertions(+), 13 deletions(-) diff --git a/block/bio.c b/block/bio.c index 365bb63626696c..b8a8bfba714f47 100644 --- a/block/bio.c +++ b/block/bio.c @@ -1470,8 +1470,7 @@ void bio_endio(struct bio *bio) if (!bio_integrity_endio(bio)) return; - if (bio->bi_bdev && bio_flagged(bio, BIO_TRACKED)) - rq_qos_done_bio(bdev_get_queue(bio->bi_bdev), bio); + rq_qos_done_bio(bio); if (bio->bi_bdev && bio_flagged(bio, BIO_TRACE_COMPLETION)) { trace_block_bio_complete(bdev_get_queue(bio->bi_bdev), bio); diff --git a/block/blk-iolatency.c b/block/blk-iolatency.c index ce3847499d85b8..d85f30a85ee754 100644 --- a/block/blk-iolatency.c +++ b/block/blk-iolatency.c @@ -601,7 +601,7 @@ static void blkcg_iolatency_done_bio(struct rq_qos *rqos, struct bio *bio) int inflight = 0; blkg = bio->bi_blkg; - if (!blkg || !bio_flagged(bio, BIO_TRACKED)) + if (!blkg || !bio_flagged(bio, BIO_QOS_THROTTLED)) return; iolat = blkg_to_lat(bio->bi_blkg); diff --git a/block/blk-rq-qos.h b/block/blk-rq-qos.h index 3cfbc8668cba98..68267007da1c67 100644 --- a/block/blk-rq-qos.h +++ b/block/blk-rq-qos.h @@ -177,20 +177,20 @@ static inline void rq_qos_requeue(struct request_queue *q, struct request *rq) __rq_qos_requeue(q->rq_qos, rq); } -static inline void rq_qos_done_bio(struct request_queue *q, struct bio *bio) +static inline void rq_qos_done_bio(struct bio *bio) { - if (q->rq_qos) - __rq_qos_done_bio(q->rq_qos, bio); + if (bio->bi_bdev && (bio_flagged(bio, BIO_QOS_THROTTLED) || + bio_flagged(bio, BIO_QOS_MERGED))) { + struct request_queue *q = bdev_get_queue(bio->bi_bdev); + if (q->rq_qos) + __rq_qos_done_bio(q->rq_qos, bio); + } } static inline void rq_qos_throttle(struct request_queue *q, struct bio *bio) { - /* - * BIO_TRACKED lets controllers know that a bio went through the - * normal rq_qos path. - */ if (q->rq_qos) { - bio_set_flag(bio, BIO_TRACKED); + bio_set_flag(bio, BIO_QOS_THROTTLED); __rq_qos_throttle(q->rq_qos, bio); } } @@ -205,8 +205,10 @@ static inline void rq_qos_track(struct request_queue *q, struct request *rq, static inline void rq_qos_merge(struct request_queue *q, struct request *rq, struct bio *bio) { - if (q->rq_qos) + if (q->rq_qos) { + bio_set_flag(bio, BIO_QOS_MERGED); __rq_qos_merge(q->rq_qos, rq, bio); + } } static inline void rq_qos_queue_depth_changed(struct request_queue *q) diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 17c92c0f15b20d..36ce3d0fb9f3b0 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -294,7 +294,8 @@ enum { BIO_TRACE_COMPLETION, /* bio_endio() should trace the final completion * of this bio. */ BIO_CGROUP_ACCT, /* has been accounted to a cgroup */ - BIO_TRACKED, /* set if bio goes through the rq_qos path */ + BIO_QOS_THROTTLED, /* bio went through rq_qos throttle path */ + BIO_QOS_MERGED, /* but went through rq_qos merge path */ BIO_REMAPPED, BIO_ZONE_WRITE_LOCKED, /* Owns a zoned device zone write lock */ BIO_PERCPU_CACHE, /* can participate in per-cpu alloc cache */ From d57ab893cdf8046cbe4d49746f9418020f788b1f Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 17 May 2021 20:01:15 -0700 Subject: [PATCH 0335/1056] stddef: Introduce struct_group() helper macro [ Upstream commit 50d7bd38c3aafc4749e05e8d7fcb616979143602 ] Kernel code has a regular need to describe groups of members within a structure usually when they need to be copied or initialized separately from the rest of the surrounding structure. The generally accepted design pattern in C is to use a named sub-struct: struct foo { int one; struct { int two; int three, four; } thing; int five; }; This would allow for traditional references and sizing: memcpy(&dst.thing, &src.thing, sizeof(dst.thing)); However, doing this would mean that referencing struct members enclosed by such named structs would always require including the sub-struct name in identifiers: do_something(dst.thing.three); This has tended to be quite inflexible, especially when such groupings need to be added to established code which causes huge naming churn. Three workarounds exist in the kernel for this problem, and each have other negative properties. To avoid the naming churn, there is a design pattern of adding macro aliases for the named struct: #define f_three thing.three This ends up polluting the global namespace, and makes it difficult to search for identifiers. Another common work-around in kernel code avoids the pollution by avoiding the named struct entirely, instead identifying the group's boundaries using either a pair of empty anonymous structs of a pair of zero-element arrays: struct foo { int one; struct { } start; int two; int three, four; struct { } finish; int five; }; struct foo { int one; int start[0]; int two; int three, four; int finish[0]; int five; }; This allows code to avoid needing to use a sub-struct named for member references within the surrounding structure, but loses the benefits of being able to actually use such a struct, making it rather fragile. Using these requires open-coded calculation of sizes and offsets. The efforts made to avoid common mistakes include lots of comments, or adding various BUILD_BUG_ON()s. Such code is left with no way for the compiler to reason about the boundaries (e.g. the "start" object looks like it's 0 bytes in length), making bounds checking depend on open-coded calculations: if (length > offsetof(struct foo, finish) - offsetof(struct foo, start)) return -EINVAL; memcpy(&dst.start, &src.start, offsetof(struct foo, finish) - offsetof(struct foo, start)); However, the vast majority of places in the kernel that operate on groups of members do so without any identification of the grouping, relying either on comments or implicit knowledge of the struct contents, which is even harder for the compiler to reason about, and results in even more fragile manual sizing, usually depending on member locations outside of the region (e.g. to copy "two" and "three", use the start of "four" to find the size): BUILD_BUG_ON((offsetof(struct foo, four) < offsetof(struct foo, two)) || (offsetof(struct foo, four) < offsetof(struct foo, three)); if (length > offsetof(struct foo, four) - offsetof(struct foo, two)) return -EINVAL; memcpy(&dst.two, &src.two, length); In order to have a regular programmatic way to describe a struct region that can be used for references and sizing, can be examined for bounds checking, avoids forcing the use of intermediate identifiers, and avoids polluting the global namespace, introduce the struct_group() macro. This macro wraps the member declarations to create an anonymous union of an anonymous struct (no intermediate name) and a named struct (for references and sizing): struct foo { int one; struct_group(thing, int two; int three, four; ); int five; }; if (length > sizeof(src.thing)) return -EINVAL; memcpy(&dst.thing, &src.thing, length); do_something(dst.three); There are some rare cases where the resulting struct_group() needs attributes added, so struct_group_attr() is also introduced to allow for specifying struct attributes (e.g. __align(x) or __packed). Additionally, there are places where such declarations would like to have the struct be tagged, so struct_group_tagged() is added. Given there is a need for a handful of UAPI uses too, the underlying __struct_group() macro has been defined in UAPI so it can be used there too. To avoid confusing scripts/kernel-doc, hide the macro from its struct parsing. Co-developed-by: Keith Packard Signed-off-by: Keith Packard Acked-by: Gustavo A. R. Silva Link: https://lore.kernel.org/lkml/20210728023217.GC35706@embeddedor Enhanced-by: Rasmus Villemoes Link: https://lore.kernel.org/lkml/41183a98-bdb9-4ad6-7eab-5a7292a6df84@rasmusvillemoes.dk Enhanced-by: Dan Williams Link: https://lore.kernel.org/lkml/1d9a2e6df2a9a35b2cdd50a9a68cac5991e7e5f0.camel@intel.com Enhanced-by: Daniel Vetter Link: https://lore.kernel.org/lkml/YQKa76A6XuFqgM03@phenom.ffwll.local Acked-by: Dan Williams Signed-off-by: Kees Cook Signed-off-by: Sasha Levin --- include/linux/stddef.h | 48 +++++++++++++++++++++++++++++++++++++ include/uapi/linux/stddef.h | 21 ++++++++++++++++ scripts/kernel-doc | 7 ++++++ 3 files changed, 76 insertions(+) diff --git a/include/linux/stddef.h b/include/linux/stddef.h index 998a4ba28eba40..938216f8ab7e7c 100644 --- a/include/linux/stddef.h +++ b/include/linux/stddef.h @@ -36,4 +36,52 @@ enum { #define offsetofend(TYPE, MEMBER) \ (offsetof(TYPE, MEMBER) + sizeof_field(TYPE, MEMBER)) +/** + * struct_group() - Wrap a set of declarations in a mirrored struct + * + * @NAME: The identifier name of the mirrored sub-struct + * @MEMBERS: The member declarations for the mirrored structs + * + * Used to create an anonymous union of two structs with identical + * layout and size: one anonymous and one named. The former can be + * used normally without sub-struct naming, and the latter can be + * used to reason about the start, end, and size of the group of + * struct members. + */ +#define struct_group(NAME, MEMBERS...) \ + __struct_group(/* no tag */, NAME, /* no attrs */, MEMBERS) + +/** + * struct_group_attr() - Create a struct_group() with trailing attributes + * + * @NAME: The identifier name of the mirrored sub-struct + * @ATTRS: Any struct attributes to apply + * @MEMBERS: The member declarations for the mirrored structs + * + * Used to create an anonymous union of two structs with identical + * layout and size: one anonymous and one named. The former can be + * used normally without sub-struct naming, and the latter can be + * used to reason about the start, end, and size of the group of + * struct members. Includes structure attributes argument. + */ +#define struct_group_attr(NAME, ATTRS, MEMBERS...) \ + __struct_group(/* no tag */, NAME, ATTRS, MEMBERS) + +/** + * struct_group_tagged() - Create a struct_group with a reusable tag + * + * @TAG: The tag name for the named sub-struct + * @NAME: The identifier name of the mirrored sub-struct + * @MEMBERS: The member declarations for the mirrored structs + * + * Used to create an anonymous union of two structs with identical + * layout and size: one anonymous and one named. The former can be + * used normally without sub-struct naming, and the latter can be + * used to reason about the start, end, and size of the group of + * struct members. Includes struct tag argument for the named copy, + * so the specified layout can be reused later. + */ +#define struct_group_tagged(TAG, NAME, MEMBERS...) \ + __struct_group(TAG, NAME, /* no attrs */, MEMBERS) + #endif diff --git a/include/uapi/linux/stddef.h b/include/uapi/linux/stddef.h index ee8220f8dcf5f5..610204f7c275d5 100644 --- a/include/uapi/linux/stddef.h +++ b/include/uapi/linux/stddef.h @@ -4,3 +4,24 @@ #ifndef __always_inline #define __always_inline inline #endif + +/** + * __struct_group() - Create a mirrored named and anonyomous struct + * + * @TAG: The tag name for the named sub-struct (usually empty) + * @NAME: The identifier name of the mirrored sub-struct + * @ATTRS: Any struct attributes (usually empty) + * @MEMBERS: The member declarations for the mirrored structs + * + * Used to create an anonymous union of two structs with identical layout + * and size: one anonymous and one named. The former's members can be used + * normally without sub-struct naming, and the latter can be used to + * reason about the start, end, and size of the group of struct members. + * The named struct can also be explicitly tagged for layer reuse, as well + * as both having struct attributes appended. + */ +#define __struct_group(TAG, NAME, ATTRS, MEMBERS...) \ + union { \ + struct { MEMBERS } ATTRS; \ + struct TAG { MEMBERS } ATTRS NAME; \ + } diff --git a/scripts/kernel-doc b/scripts/kernel-doc index cfcb6073795773..38aa799a776c0d 100755 --- a/scripts/kernel-doc +++ b/scripts/kernel-doc @@ -1245,6 +1245,13 @@ sub dump_struct($$) { $members =~ s/\s*CRYPTO_MINALIGN_ATTR/ /gos; $members =~ s/\s*____cacheline_aligned_in_smp/ /gos; $members =~ s/\s*____cacheline_aligned/ /gos; + # unwrap struct_group(): + # - first eat non-declaration parameters and rewrite for final match + # - then remove macro, outer parens, and trailing semicolon + $members =~ s/\bstruct_group\s*\(([^,]*,)/STRUCT_GROUP(/gos; + $members =~ s/\bstruct_group_(attr|tagged)\s*\(([^,]*,){2}/STRUCT_GROUP(/gos; + $members =~ s/\b__struct_group\s*\(([^,]*,){3}/STRUCT_GROUP(/gos; + $members =~ s/\bSTRUCT_GROUP(\(((?:(?>[^)(]+)|(?1))*)\))[^;]*;/$2/gos; my $args = qr{([^,)]+)}; # replace DECLARE_BITMAP From 2823225fbba0e0b822767784139c04b4a773fe15 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 24 Jan 2022 18:29:52 +0100 Subject: [PATCH 0336/1056] media: omap3isp: Use struct_group() for memcpy() region [ Upstream commit d4568fc8525897e683983806f813be1ae9eedaed ] In preparation for FORTIFY_SOURCE performing compile-time and run-time field bounds checking for memcpy(), memmove(), and memset(), avoid intentionally writing across neighboring fields. Wrap the target region in struct_group(). This additionally fixes a theoretical misalignment of the copy (since the size of "buf" changes between 64-bit and 32-bit, but this is likely never built for 64-bit). FWIW, I think this code is totally broken on 64-bit (which appears to not be a "real" build configuration): it would either always fail (with an uninitialized data->buf_size) or would cause corruption in userspace due to the copy_to_user() in the call path against an uninitialized data->buf value: omap3isp_stat_request_statistics_time32(...) struct omap3isp_stat_data data64; ... omap3isp_stat_request_statistics(stat, &data64); int omap3isp_stat_request_statistics(struct ispstat *stat, struct omap3isp_stat_data *data) ... buf = isp_stat_buf_get(stat, data); static struct ispstat_buffer *isp_stat_buf_get(struct ispstat *stat, struct omap3isp_stat_data *data) ... if (buf->buf_size > data->buf_size) { ... return ERR_PTR(-EINVAL); } ... rval = copy_to_user(data->buf, buf->virt_addr, buf->buf_size); Regardless, additionally initialize data64 to be zero-filled to avoid undefined behavior. Link: https://lore.kernel.org/lkml/20211215220505.GB21862@embeddedor Cc: Arnd Bergmann Fixes: 378e3f81cb56 ("media: omap3isp: support 64-bit version of omap3isp_stat_data") Cc: stable@vger.kernel.org Reviewed-by: Gustavo A. R. Silva Signed-off-by: Kees Cook Reviewed-by: Laurent Pinchart Signed-off-by: Sakari Ailus Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin --- drivers/media/platform/omap3isp/ispstat.c | 5 +++-- include/uapi/linux/omap3isp.h | 21 +++++++++++++-------- 2 files changed, 16 insertions(+), 10 deletions(-) diff --git a/drivers/media/platform/omap3isp/ispstat.c b/drivers/media/platform/omap3isp/ispstat.c index 5b9b57f4d9bf83..68cf68dbcace28 100644 --- a/drivers/media/platform/omap3isp/ispstat.c +++ b/drivers/media/platform/omap3isp/ispstat.c @@ -512,7 +512,7 @@ int omap3isp_stat_request_statistics(struct ispstat *stat, int omap3isp_stat_request_statistics_time32(struct ispstat *stat, struct omap3isp_stat_data_time32 *data) { - struct omap3isp_stat_data data64; + struct omap3isp_stat_data data64 = { }; int ret; ret = omap3isp_stat_request_statistics(stat, &data64); @@ -521,7 +521,8 @@ int omap3isp_stat_request_statistics_time32(struct ispstat *stat, data->ts.tv_sec = data64.ts.tv_sec; data->ts.tv_usec = data64.ts.tv_usec; - memcpy(&data->buf, &data64.buf, sizeof(*data) - sizeof(data->ts)); + data->buf = (uintptr_t)data64.buf; + memcpy(&data->frame, &data64.frame, sizeof(data->frame)); return 0; } diff --git a/include/uapi/linux/omap3isp.h b/include/uapi/linux/omap3isp.h index 87b55755f4ffe5..d9db7ad4389080 100644 --- a/include/uapi/linux/omap3isp.h +++ b/include/uapi/linux/omap3isp.h @@ -162,6 +162,7 @@ struct omap3isp_h3a_aewb_config { * struct omap3isp_stat_data - Statistic data sent to or received from user * @ts: Timestamp of returned framestats. * @buf: Pointer to pass to user. + * @buf_size: Size of buffer. * @frame_number: Frame number of requested stats. * @cur_frame: Current frame number being processed. * @config_counter: Number of the configuration associated with the data. @@ -176,10 +177,12 @@ struct omap3isp_stat_data { struct timeval ts; #endif void __user *buf; - __u32 buf_size; - __u16 frame_number; - __u16 cur_frame; - __u16 config_counter; + __struct_group(/* no tag */, frame, /* no attrs */, + __u32 buf_size; + __u16 frame_number; + __u16 cur_frame; + __u16 config_counter; + ); }; #ifdef __KERNEL__ @@ -189,10 +192,12 @@ struct omap3isp_stat_data_time32 { __s32 tv_usec; } ts; __u32 buf; - __u32 buf_size; - __u16 frame_number; - __u16 cur_frame; - __u16 config_counter; + __struct_group(/* no tag */, frame, /* no attrs */, + __u32 buf_size; + __u16 frame_number; + __u16 cur_frame; + __u16 config_counter; + ); }; #endif From 6512c3c39cb6b573b791ce45365818a38b76afbe Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 22 Dec 2021 15:20:24 +0100 Subject: [PATCH 0337/1056] media: davinci: vpif: fix use-after-free on driver unbind [ Upstream commit 43acb728bbc40169d2e2425e84a80068270974be ] The driver allocates and registers two platform device structures during probe, but the devices were never deregistered on driver unbind. This results in a use-after-free on driver unbind as the device structures were allocated using devres and would be freed by driver core when remove() returns. Fix this by adding the missing deregistration calls to the remove() callback and failing probe on registration errors. Note that the platform device structures must be freed using a proper release callback to avoid leaking associated resources like device names. Fixes: 479f7a118105 ("[media] davinci: vpif: adaptions for DT support") Cc: stable@vger.kernel.org # 4.12 Cc: Kevin Hilman Signed-off-by: Johan Hovold Reviewed-by: Lad Prabhakar Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin --- drivers/media/platform/davinci/vpif.c | 97 ++++++++++++++++++++------- 1 file changed, 71 insertions(+), 26 deletions(-) diff --git a/drivers/media/platform/davinci/vpif.c b/drivers/media/platform/davinci/vpif.c index 5658c7f148d79a..8ffc01c606d0cf 100644 --- a/drivers/media/platform/davinci/vpif.c +++ b/drivers/media/platform/davinci/vpif.c @@ -41,6 +41,11 @@ MODULE_ALIAS("platform:" VPIF_DRIVER_NAME); #define VPIF_CH2_MAX_MODES 15 #define VPIF_CH3_MAX_MODES 2 +struct vpif_data { + struct platform_device *capture; + struct platform_device *display; +}; + DEFINE_SPINLOCK(vpif_lock); EXPORT_SYMBOL_GPL(vpif_lock); @@ -423,11 +428,19 @@ int vpif_channel_getfid(u8 channel_id) } EXPORT_SYMBOL(vpif_channel_getfid); +static void vpif_pdev_release(struct device *dev) +{ + struct platform_device *pdev = to_platform_device(dev); + + kfree(pdev); +} + static int vpif_probe(struct platform_device *pdev) { static struct resource *res, *res_irq; struct platform_device *pdev_capture, *pdev_display; struct device_node *endpoint = NULL; + struct vpif_data *data; int ret; res = platform_get_resource(pdev, IORESOURCE_MEM, 0); @@ -435,6 +448,12 @@ static int vpif_probe(struct platform_device *pdev) if (IS_ERR(vpif_base)) return PTR_ERR(vpif_base); + data = kzalloc(sizeof(*data), GFP_KERNEL); + if (!data) + return -ENOMEM; + + platform_set_drvdata(pdev, data); + pm_runtime_enable(&pdev->dev); pm_runtime_get(&pdev->dev); @@ -462,49 +481,75 @@ static int vpif_probe(struct platform_device *pdev) goto err_put_rpm; } - pdev_capture = devm_kzalloc(&pdev->dev, sizeof(*pdev_capture), - GFP_KERNEL); - if (pdev_capture) { - pdev_capture->name = "vpif_capture"; - pdev_capture->id = -1; - pdev_capture->resource = res_irq; - pdev_capture->num_resources = 1; - pdev_capture->dev.dma_mask = pdev->dev.dma_mask; - pdev_capture->dev.coherent_dma_mask = pdev->dev.coherent_dma_mask; - pdev_capture->dev.parent = &pdev->dev; - platform_device_register(pdev_capture); - } else { - dev_warn(&pdev->dev, "Unable to allocate memory for pdev_capture.\n"); + pdev_capture = kzalloc(sizeof(*pdev_capture), GFP_KERNEL); + if (!pdev_capture) { + ret = -ENOMEM; + goto err_put_rpm; } - pdev_display = devm_kzalloc(&pdev->dev, sizeof(*pdev_display), - GFP_KERNEL); - if (pdev_display) { - pdev_display->name = "vpif_display"; - pdev_display->id = -1; - pdev_display->resource = res_irq; - pdev_display->num_resources = 1; - pdev_display->dev.dma_mask = pdev->dev.dma_mask; - pdev_display->dev.coherent_dma_mask = pdev->dev.coherent_dma_mask; - pdev_display->dev.parent = &pdev->dev; - platform_device_register(pdev_display); - } else { - dev_warn(&pdev->dev, "Unable to allocate memory for pdev_display.\n"); + pdev_capture->name = "vpif_capture"; + pdev_capture->id = -1; + pdev_capture->resource = res_irq; + pdev_capture->num_resources = 1; + pdev_capture->dev.dma_mask = pdev->dev.dma_mask; + pdev_capture->dev.coherent_dma_mask = pdev->dev.coherent_dma_mask; + pdev_capture->dev.parent = &pdev->dev; + pdev_capture->dev.release = vpif_pdev_release; + + ret = platform_device_register(pdev_capture); + if (ret) + goto err_put_pdev_capture; + + pdev_display = kzalloc(sizeof(*pdev_display), GFP_KERNEL); + if (!pdev_display) { + ret = -ENOMEM; + goto err_put_pdev_capture; } + pdev_display->name = "vpif_display"; + pdev_display->id = -1; + pdev_display->resource = res_irq; + pdev_display->num_resources = 1; + pdev_display->dev.dma_mask = pdev->dev.dma_mask; + pdev_display->dev.coherent_dma_mask = pdev->dev.coherent_dma_mask; + pdev_display->dev.parent = &pdev->dev; + pdev_display->dev.release = vpif_pdev_release; + + ret = platform_device_register(pdev_display); + if (ret) + goto err_put_pdev_display; + + data->capture = pdev_capture; + data->display = pdev_display; + return 0; +err_put_pdev_display: + platform_device_put(pdev_display); +err_put_pdev_capture: + platform_device_put(pdev_capture); err_put_rpm: pm_runtime_put(&pdev->dev); pm_runtime_disable(&pdev->dev); + kfree(data); return ret; } static int vpif_remove(struct platform_device *pdev) { + struct vpif_data *data = platform_get_drvdata(pdev); + + if (data->capture) + platform_device_unregister(data->capture); + if (data->display) + platform_device_unregister(data->display); + pm_runtime_put(&pdev->dev); pm_runtime_disable(&pdev->dev); + + kfree(data); + return 0; } From 9d721a17505b578cec77ada07a6e36d7ac5fc03c Mon Sep 17 00:00:00 2001 From: Sean Wang Date: Fri, 24 Dec 2021 16:33:55 +0800 Subject: [PATCH 0338/1056] mt76: mt76_connac: fix MCU_CE_CMD_SET_ROC definition error [ Upstream commit bf9727a27442a50c75b7d99a5088330c578b2a42 ] Fixed an MCU_CE_CMD_SET_ROC definition error that occurred from a previous refactor work. Fixes: d0e274af2f2e4 ("mt76: mt76_connac: create mcu library") Signed-off-by: Sean Wang Signed-off-by: Felix Fietkau Signed-off-by: Sasha Levin --- drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.h b/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.h index 77d4435e4581eb..72a70a7046fbc5 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.h +++ b/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.h @@ -556,7 +556,7 @@ enum { MCU_CMD_SET_BSS_CONNECTED = MCU_CE_PREFIX | 0x16, MCU_CMD_SET_BSS_ABORT = MCU_CE_PREFIX | 0x17, MCU_CMD_CANCEL_HW_SCAN = MCU_CE_PREFIX | 0x1b, - MCU_CMD_SET_ROC = MCU_CE_PREFIX | 0x1d, + MCU_CMD_SET_ROC = MCU_CE_PREFIX | 0x1c, MCU_CMD_SET_P2P_OPPPS = MCU_CE_PREFIX | 0x33, MCU_CMD_SET_RATE_TX_POWER = MCU_CE_PREFIX | 0x5d, MCU_CMD_SCHED_SCAN_ENABLE = MCU_CE_PREFIX | 0x61, From 8b5ce83872b4ceb2ff77076ccefbf5b5523c1626 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Fri, 31 Dec 2021 12:36:02 +0100 Subject: [PATCH 0339/1056] mt76: mt7921: do not always disable fw runtime-pm [ Upstream commit b44eeb8cbdf2b88f2844f11e4f263b0abed5b5b0 ] After commit 'd430dffbe9dd ("mt76: mt7921: fix a possible race enabling/disabling runtime-pm")', runtime-pm is always disabled in the fw even if the user requests to enable it toggling debugfs node since mt7921_pm_interface_iter routine will use pm->enable to configure the fw. Fix the issue moving enable variable configuration before running mt7921_pm_interface_iter routine. Fixes: d430dffbe9dd ("mt76: mt7921: fix a possible race enabling/disabling runtime-pm") Signed-off-by: Lorenzo Bianconi Signed-off-by: Felix Fietkau Signed-off-by: Sasha Levin --- drivers/net/wireless/mediatek/mt76/mt7921/debugfs.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/debugfs.c b/drivers/net/wireless/mediatek/mt76/mt7921/debugfs.c index b9f25599227d25..cfcf7964c68814 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7921/debugfs.c +++ b/drivers/net/wireless/mediatek/mt76/mt7921/debugfs.c @@ -291,13 +291,12 @@ mt7921_pm_set(void *data, u64 val) pm->enable = false; mt76_connac_pm_wake(&dev->mphy, pm); + pm->enable = val; ieee80211_iterate_active_interfaces(mt76_hw(dev), IEEE80211_IFACE_ITER_RESUME_ALL, mt7921_pm_interface_iter, dev); mt76_connac_mcu_set_deep_sleep(&dev->mt76, pm->ds_enable); - - pm->enable = val; mt76_connac_power_save_sched(&dev->mphy, pm); out: mutex_unlock(&dev->mt76.mutex); From 518bb96367123062b48b0a9842f2864249b565f6 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 16 Feb 2022 16:25:11 -0800 Subject: [PATCH 0340/1056] cxl/port: Hold port reference until decoder release [ Upstream commit 74be98774dfbc5b8b795db726bd772e735d2edd4 ] KASAN + DEBUG_KOBJECT_RELEASE reports a potential use-after-free in cxl_decoder_release() where it goes to reference its parent, a cxl_port, to free its id back to port->decoder_ida. BUG: KASAN: use-after-free in to_cxl_port+0x18/0x90 [cxl_core] Read of size 8 at addr ffff888119270908 by task kworker/35:2/379 CPU: 35 PID: 379 Comm: kworker/35:2 Tainted: G OE 5.17.0-rc2+ #198 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 0.0.0 02/06/2015 Workqueue: events kobject_delayed_cleanup Call Trace: dump_stack_lvl+0x59/0x73 print_address_description.constprop.0+0x1f/0x150 ? to_cxl_port+0x18/0x90 [cxl_core] kasan_report.cold+0x83/0xdf ? to_cxl_port+0x18/0x90 [cxl_core] to_cxl_port+0x18/0x90 [cxl_core] cxl_decoder_release+0x2a/0x60 [cxl_core] device_release+0x5f/0x100 kobject_cleanup+0x80/0x1c0 The device core only guarantees parent lifetime until all children are unregistered. If a child needs a parent to complete its ->release() callback that child needs to hold a reference to extend the lifetime of the parent. Fixes: 40ba17afdfab ("cxl/acpi: Introduce cxl_decoder objects") Reported-by: Ben Widawsky Tested-by: Ben Widawsky Reviewed-by: Ben Widawsky Link: https://lore.kernel.org/r/164505751190.4175768.13324905271463416712.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams Signed-off-by: Sasha Levin --- drivers/cxl/core/bus.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/cxl/core/bus.c b/drivers/cxl/core/bus.c index 267d8042bec22e..0987a6423ee061 100644 --- a/drivers/cxl/core/bus.c +++ b/drivers/cxl/core/bus.c @@ -182,6 +182,7 @@ static void cxl_decoder_release(struct device *dev) ida_free(&port->decoder_ida, cxld->id); kfree(cxld); + put_device(&port->dev); } static const struct device_type cxl_decoder_switch_type = { @@ -481,6 +482,9 @@ cxl_decoder_alloc(struct cxl_port *port, int nr_targets, resource_size_t base, if (rc < 0) goto err; + /* need parent to stick around to release the id */ + get_device(&port->dev); + *cxld = (struct cxl_decoder) { .id = rc, .range = { From 0855054fa8893ea64f75ee02b43198ed17d75c0a Mon Sep 17 00:00:00 2001 From: Lad Prabhakar Date: Thu, 23 Dec 2021 09:32:23 +0000 Subject: [PATCH 0341/1056] clk: renesas: r9a07g044: Update multiplier and divider values for PLL2/3 [ Upstream commit b289cdecc7c3e25e001cde260c882e4d9a8b0772 ] As per the HW manual (Rev.1.00 Sep, 2021) PLL2 and PLL3 should be 1600 MHz, but with current multiplier and divider values this resulted to 1596 MHz. This patch updates the multiplier and divider values for PLL2 and PLL3 so that we get the exact (1600 MHz) values. Fixes: 17f0ff3d49ff1 ("clk: renesas: Add support for R9A07G044 SoC") Suggested-by: Biju Das Signed-off-by: Lad Prabhakar Link: https://lore.kernel.org/r/20211223093223.4725-1-prabhakar.mahadev-lad.rj@bp.renesas.com Signed-off-by: Geert Uytterhoeven Signed-off-by: Sasha Levin --- drivers/clk/renesas/r9a07g044-cpg.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/clk/renesas/r9a07g044-cpg.c b/drivers/clk/renesas/r9a07g044-cpg.c index 1490446985e2e8..61609eddf7d04f 100644 --- a/drivers/clk/renesas/r9a07g044-cpg.c +++ b/drivers/clk/renesas/r9a07g044-cpg.c @@ -61,8 +61,8 @@ static const struct cpg_core_clk r9a07g044_core_clks[] __initconst = { DEF_FIXED(".osc", R9A07G044_OSCCLK, CLK_EXTAL, 1, 1), DEF_FIXED(".osc_div1000", CLK_OSC_DIV1000, CLK_EXTAL, 1, 1000), DEF_SAMPLL(".pll1", CLK_PLL1, CLK_EXTAL, PLL146_CONF(0)), - DEF_FIXED(".pll2", CLK_PLL2, CLK_EXTAL, 133, 2), - DEF_FIXED(".pll3", CLK_PLL3, CLK_EXTAL, 133, 2), + DEF_FIXED(".pll2", CLK_PLL2, CLK_EXTAL, 200, 3), + DEF_FIXED(".pll3", CLK_PLL3, CLK_EXTAL, 200, 3), DEF_FIXED(".pll2_div2", CLK_PLL2_DIV2, CLK_PLL2, 1, 2), DEF_FIXED(".pll2_div16", CLK_PLL2_DIV16, CLK_PLL2, 1, 16), From eabbe74e7de5ee1841764e8937d3812bc4843eb5 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Sat, 20 Nov 2021 01:50:08 +0000 Subject: [PATCH 0342/1056] KVM: x86/mmu: Use yield-safe TDP MMU root iter in MMU notifier unmapping [ Upstream commit 7533377215b6ee432c06c5855f6be5d66e694e46 ] Use the yield-safe variant of the TDP MMU iterator when handling an unmapping event from the MMU notifier, as most occurences of the event allow yielding. Fixes: e1eed5847b09 ("KVM: x86/mmu: Allow yielding during MMU notifier unmap/zap, if possible") Cc: stable@vger.kernel.org Signed-off-by: Sean Christopherson Message-Id: <20211120015008.3780032-1-seanjc@google.com> Signed-off-by: Paolo Bonzini Signed-off-by: Sasha Levin --- arch/x86/kvm/mmu/tdp_mmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c index 853780eb033b4a..6195f0d219ae06 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.c +++ b/arch/x86/kvm/mmu/tdp_mmu.c @@ -1100,7 +1100,7 @@ bool kvm_tdp_mmu_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range, { struct kvm_mmu_page *root; - for_each_tdp_mmu_root(kvm, root, range->slot->as_id) + for_each_tdp_mmu_root_yield_safe(kvm, root, range->slot->as_id, false) flush = zap_gfn_range(kvm, root, range->start, range->end, range->may_block, flush, false); From 1e9d6854951a67d2df992b4d7b5a7e904cea4d3f Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 15 Dec 2021 01:15:54 +0000 Subject: [PATCH 0343/1056] KVM: x86/mmu: Use common TDP MMU zap helper for MMU notifier unmap hook [ Upstream commit 83b83a02073ec8d18c77a9bbe0881d710f7a9d32 ] Use the common TDP MMU zap helper when handling an MMU notifier unmap event, the two flows are semantically identical. Consolidate the code in preparation for a future bug fix, as both kvm_tdp_mmu_unmap_gfn_range() and __kvm_tdp_mmu_zap_gfn_range() are guilty of not zapping SPTEs in invalid roots. No functional change intended. Cc: stable@vger.kernel.org Signed-off-by: Sean Christopherson Message-Id: <20211215011557.399940-2-seanjc@google.com> Signed-off-by: Paolo Bonzini Signed-off-by: Sasha Levin --- arch/x86/kvm/mmu/tdp_mmu.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c index 6195f0d219ae06..6c2bb60ccd88b8 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.c +++ b/arch/x86/kvm/mmu/tdp_mmu.c @@ -1098,13 +1098,8 @@ int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code, bool kvm_tdp_mmu_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range, bool flush) { - struct kvm_mmu_page *root; - - for_each_tdp_mmu_root_yield_safe(kvm, root, range->slot->as_id, false) - flush = zap_gfn_range(kvm, root, range->start, range->end, - range->may_block, flush, false); - - return flush; + return __kvm_tdp_mmu_zap_gfn_range(kvm, range->slot->as_id, range->start, + range->end, range->may_block, flush); } typedef bool (*tdp_handler_t)(struct kvm *kvm, struct tdp_iter *iter, From 241afac69b967673a59414bea10ea2024ce4315b Mon Sep 17 00:00:00 2001 From: Manish Rangankar Date: Wed, 8 Sep 2021 09:46:20 -0700 Subject: [PATCH 0344/1056] scsi: qla2xxx: Move heartbeat handling from DPC thread to workqueue MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 3a4e1f3b3a3c733de3b82b9b522e54803e1165ae ] DPC thread gets restricted due to a no-op mailbox, which is a blocking call and has a high execution frequency. To free up the DPC thread we move no-op handling to the workqueue. Also, modified qla_do_heartbeat() to send no-op MBC if we don’t have any active interrupts, but there are still I/Os outstanding with firmware. Link: https://lore.kernel.org/r/20210908164622.19240-9-njavali@marvell.com Fixes: d94d8158e184 ("scsi: qla2xxx: Add heartbeat check") Reviewed-by: Himanshu Madhani Signed-off-by: Manish Rangankar Signed-off-by: Nilesh Javali Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/qla2xxx/qla_def.h | 4 +- drivers/scsi/qla2xxx/qla_init.c | 2 + drivers/scsi/qla2xxx/qla_os.c | 78 +++++++++++++++------------------ 3 files changed, 40 insertions(+), 44 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h index 2ea35e47ea4417..0589ab8e646710 100644 --- a/drivers/scsi/qla2xxx/qla_def.h +++ b/drivers/scsi/qla2xxx/qla_def.h @@ -3759,6 +3759,7 @@ struct qla_qpair { struct qla_fw_resources fwres ____cacheline_aligned; u32 cmd_cnt; u32 cmd_completion_cnt; + u32 prev_completion_cnt; }; /* Place holder for FW buffer parameters */ @@ -4618,6 +4619,7 @@ struct qla_hw_data { struct qla_chip_state_84xx *cs84xx; struct isp_operations *isp_ops; struct workqueue_struct *wq; + struct work_struct heartbeat_work; struct qlfc_fw fw_buf; /* FCP_CMND priority support */ @@ -4719,7 +4721,6 @@ struct qla_hw_data { struct qla_hw_data_stat stat; pci_error_state_t pci_error_state; - u64 prev_cmd_cnt; struct dma_pool *purex_dma_pool; struct btree_head32 host_map; @@ -4865,7 +4866,6 @@ typedef struct scsi_qla_host { #define SET_ZIO_THRESHOLD_NEEDED 32 #define ISP_ABORT_TO_ROM 33 #define VPORT_DELETE 34 -#define HEARTBEAT_CHK 38 #define PROCESS_PUREX_IOCB 63 diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c index af8df5a800c61c..c3ba2995209bd8 100644 --- a/drivers/scsi/qla2xxx/qla_init.c +++ b/drivers/scsi/qla2xxx/qla_init.c @@ -7096,12 +7096,14 @@ qla2x00_abort_isp_cleanup(scsi_qla_host_t *vha) ha->chip_reset++; ha->base_qpair->chip_reset = ha->chip_reset; ha->base_qpair->cmd_cnt = ha->base_qpair->cmd_completion_cnt = 0; + ha->base_qpair->prev_completion_cnt = 0; for (i = 0; i < ha->max_qpairs; i++) { if (ha->queue_pair_map[i]) { ha->queue_pair_map[i]->chip_reset = ha->base_qpair->chip_reset; ha->queue_pair_map[i]->cmd_cnt = ha->queue_pair_map[i]->cmd_completion_cnt = 0; + ha->base_qpair->prev_completion_cnt = 0; } } diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index 77c0bf06f162b0..b224326bacee04 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -2779,6 +2779,16 @@ qla2xxx_scan_finished(struct Scsi_Host *shost, unsigned long time) return atomic_read(&vha->loop_state) == LOOP_READY; } +static void qla_heartbeat_work_fn(struct work_struct *work) +{ + struct qla_hw_data *ha = container_of(work, + struct qla_hw_data, heartbeat_work); + struct scsi_qla_host *base_vha = pci_get_drvdata(ha->pdev); + + if (!ha->flags.mbox_busy && base_vha->flags.init_done) + qla_no_op_mb(base_vha); +} + static void qla2x00_iocb_work_fn(struct work_struct *work) { struct scsi_qla_host *vha = container_of(work, @@ -3217,6 +3227,7 @@ qla2x00_probe_one(struct pci_dev *pdev, const struct pci_device_id *id) host->transportt, sht->vendor_id); INIT_WORK(&base_vha->iocb_work, qla2x00_iocb_work_fn); + INIT_WORK(&ha->heartbeat_work, qla_heartbeat_work_fn); /* Set up the irqs */ ret = qla2x00_request_irqs(ha, rsp); @@ -7103,17 +7114,6 @@ qla2x00_do_dpc(void *data) qla2x00_lip_reset(base_vha); } - if (test_bit(HEARTBEAT_CHK, &base_vha->dpc_flags)) { - /* - * if there is a mb in progress then that's - * enough of a check to see if fw is still ticking. - */ - if (!ha->flags.mbox_busy && base_vha->flags.init_done) - qla_no_op_mb(base_vha); - - clear_bit(HEARTBEAT_CHK, &base_vha->dpc_flags); - } - ha->dpc_active = 0; end_loop: set_current_state(TASK_INTERRUPTIBLE); @@ -7172,57 +7172,51 @@ qla2x00_rst_aen(scsi_qla_host_t *vha) static bool qla_do_heartbeat(struct scsi_qla_host *vha) { - u64 cmd_cnt, prev_cmd_cnt; - bool do_hb = false; struct qla_hw_data *ha = vha->hw; - int i; + u32 cmpl_cnt; + u16 i; + bool do_heartbeat = false; - /* if cmds are still pending down in fw, then do hb */ - if (ha->base_qpair->cmd_cnt != ha->base_qpair->cmd_completion_cnt) { - do_hb = true; + /* + * Allow do_heartbeat only if we don’t have any active interrupts, + * but there are still IOs outstanding with firmware. + */ + cmpl_cnt = ha->base_qpair->cmd_completion_cnt; + if (cmpl_cnt == ha->base_qpair->prev_completion_cnt && + cmpl_cnt != ha->base_qpair->cmd_cnt) { + do_heartbeat = true; goto skip; } + ha->base_qpair->prev_completion_cnt = cmpl_cnt; for (i = 0; i < ha->max_qpairs; i++) { - if (ha->queue_pair_map[i] && - ha->queue_pair_map[i]->cmd_cnt != - ha->queue_pair_map[i]->cmd_completion_cnt) { - do_hb = true; - break; + if (ha->queue_pair_map[i]) { + cmpl_cnt = ha->queue_pair_map[i]->cmd_completion_cnt; + if (cmpl_cnt == ha->queue_pair_map[i]->prev_completion_cnt && + cmpl_cnt != ha->queue_pair_map[i]->cmd_cnt) { + do_heartbeat = true; + break; + } + ha->queue_pair_map[i]->prev_completion_cnt = cmpl_cnt; } } skip: - prev_cmd_cnt = ha->prev_cmd_cnt; - cmd_cnt = ha->base_qpair->cmd_cnt; - for (i = 0; i < ha->max_qpairs; i++) { - if (ha->queue_pair_map[i]) - cmd_cnt += ha->queue_pair_map[i]->cmd_cnt; - } - ha->prev_cmd_cnt = cmd_cnt; - - if (!do_hb && ((cmd_cnt - prev_cmd_cnt) > 50)) - /* - * IOs are completing before periodic hb check. - * IOs seems to be running, do hb for sanity check. - */ - do_hb = true; - - return do_hb; + return do_heartbeat; } static void qla_heart_beat(struct scsi_qla_host *vha) { + struct qla_hw_data *ha = vha->hw; + if (vha->vp_idx) return; if (vha->hw->flags.eeh_busy || qla2x00_chip_is_down(vha)) return; - if (qla_do_heartbeat(vha)) { - set_bit(HEARTBEAT_CHK, &vha->dpc_flags); - qla2xxx_wake_dpc(vha); - } + if (qla_do_heartbeat(vha)) + queue_work(ha->wq, &ha->heartbeat_work); } /************************************************************************** From d4510119911cde343e2f81dd78edbdd4188a3ddd Mon Sep 17 00:00:00 2001 From: Quinn Tran Date: Thu, 10 Mar 2022 01:25:59 -0800 Subject: [PATCH 0345/1056] scsi: qla2xxx: Fix laggy FC remote port session recovery [ Upstream commit 713b415726f100f6644971e75ebfe1edbef1a390 ] For session recovery, driver relies on the dpc thread to initiate certain operations. The dpc thread runs exclusively without the Mailbox interface being occupied. A recent code change for heartbeat check via mailbox cmd 0 is preventing the dpc thread from carrying out its operation. This patch allows the higher priority error recovery to run first before running the lower priority heartbeat check. Link: https://lore.kernel.org/r/20220310092604.22950-9-njavali@marvell.com Fixes: d94d8158e184 ("scsi: qla2xxx: Add heartbeat check") Cc: stable@vger.kernel.org Reviewed-by: Himanshu Madhani Signed-off-by: Quinn Tran Signed-off-by: Nilesh Javali Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/qla2xxx/qla_def.h | 1 + drivers/scsi/qla2xxx/qla_os.c | 20 +++++++++++++++++--- 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h index 0589ab8e646710..303ad60d1d49d5 100644 --- a/drivers/scsi/qla2xxx/qla_def.h +++ b/drivers/scsi/qla2xxx/qla_def.h @@ -4621,6 +4621,7 @@ struct qla_hw_data { struct workqueue_struct *wq; struct work_struct heartbeat_work; struct qlfc_fw fw_buf; + unsigned long last_heartbeat_run_jiffies; /* FCP_CMND priority support */ struct qla_fcp_prio_cfg *fcp_prio_cfg; diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index b224326bacee04..12958aea893f30 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -7205,7 +7205,7 @@ static bool qla_do_heartbeat(struct scsi_qla_host *vha) return do_heartbeat; } -static void qla_heart_beat(struct scsi_qla_host *vha) +static void qla_heart_beat(struct scsi_qla_host *vha, u16 dpc_started) { struct qla_hw_data *ha = vha->hw; @@ -7215,8 +7215,19 @@ static void qla_heart_beat(struct scsi_qla_host *vha) if (vha->hw->flags.eeh_busy || qla2x00_chip_is_down(vha)) return; - if (qla_do_heartbeat(vha)) + /* + * dpc thread cannot run if heartbeat is running at the same time. + * We also do not want to starve heartbeat task. Therefore, do + * heartbeat task at least once every 5 seconds. + */ + if (dpc_started && + time_before(jiffies, ha->last_heartbeat_run_jiffies + 5 * HZ)) + return; + + if (qla_do_heartbeat(vha)) { + ha->last_heartbeat_run_jiffies = jiffies; queue_work(ha->wq, &ha->heartbeat_work); + } } /************************************************************************** @@ -7407,6 +7418,8 @@ qla2x00_timer(struct timer_list *t) start_dpc++; } + /* borrowing w to signify dpc will run */ + w = 0; /* Schedule the DPC routine if needed */ if ((test_bit(ISP_ABORT_NEEDED, &vha->dpc_flags) || test_bit(LOOP_RESYNC_NEEDED, &vha->dpc_flags) || @@ -7439,9 +7452,10 @@ qla2x00_timer(struct timer_list *t) test_bit(RELOGIN_NEEDED, &vha->dpc_flags), test_bit(PROCESS_PUREX_IOCB, &vha->dpc_flags)); qla2xxx_wake_dpc(vha); + w = 1; } - qla_heart_beat(vha); + qla_heart_beat(vha, w); qla2x00_restart_timer(vha, WATCH_INTERVAL); } From 72806635ee63072aa93d84c2df75ca22a5c3d7fe Mon Sep 17 00:00:00 2001 From: Quinn Tran Date: Tue, 26 Oct 2021 04:54:06 -0700 Subject: [PATCH 0346/1056] scsi: qla2xxx: edif: Replace list_for_each_safe with list_for_each_entry_safe [ Upstream commit 8062b742d3bd336ca10ab5a1db1629d33700f9c6 ] This patch is per review comment by Hannes Reinecke from previous submission to replace list_for_each_safe with list_for_each_entry_safe. Link: https://lore.kernel.org/r/20211026115412.27691-8-njavali@marvell.com Reviewed-by: Hannes Reinecke Reviewed-by: Himanshu Madhani Signed-off-by: Quinn Tran Signed-off-by: Nilesh Javali Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/qla2xxx/qla_edif.c | 39 ++++++++------------------------- drivers/scsi/qla2xxx/qla_edif.h | 1 - drivers/scsi/qla2xxx/qla_os.c | 8 +++---- 3 files changed, 13 insertions(+), 35 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_edif.c b/drivers/scsi/qla2xxx/qla_edif.c index a00fe88c60218c..e40b9cc382146b 100644 --- a/drivers/scsi/qla2xxx/qla_edif.c +++ b/drivers/scsi/qla2xxx/qla_edif.c @@ -1684,41 +1684,25 @@ static struct enode * qla_enode_find(scsi_qla_host_t *vha, uint32_t ntype, uint32_t p1, uint32_t p2) { struct enode *node_rtn = NULL; - struct enode *list_node = NULL; + struct enode *list_node, *q; unsigned long flags; - struct list_head *pos, *q; uint32_t sid; - uint32_t rw_flag; struct purexevent *purex; /* secure the list from moving under us */ spin_lock_irqsave(&vha->pur_cinfo.pur_lock, flags); - list_for_each_safe(pos, q, &vha->pur_cinfo.head) { - list_node = list_entry(pos, struct enode, list); + list_for_each_entry_safe(list_node, q, &vha->pur_cinfo.head, list) { /* node type determines what p1 and p2 are */ purex = &list_node->u.purexinfo; sid = p1; - rw_flag = p2; if (purex->pur_info.pur_sid.b24 == sid) { - if (purex->pur_info.pur_pend == 1 && - rw_flag == PUR_GET) { - /* - * if the receive is in progress - * and its a read/get then can't - * transfer yet - */ - ql_dbg(ql_dbg_edif, vha, 0x9106, - "%s purex xfer in progress for sid=%x\n", - __func__, sid); - } else { - /* found it and its complete */ - node_rtn = list_node; - list_del(pos); - break; - } + /* found it and its complete */ + node_rtn = list_node; + list_del(&list_node->list); + break; } } @@ -2428,7 +2412,6 @@ void qla24xx_auth_els(scsi_qla_host_t *vha, void **pkt, struct rsp_que **rsp) purex = &ptr->u.purexinfo; purex->pur_info.pur_sid = a.did; - purex->pur_info.pur_pend = 0; purex->pur_info.pur_bytes_rcvd = totlen; purex->pur_info.pur_rx_xchg_address = le32_to_cpu(p->rx_xchg_addr); purex->pur_info.pur_nphdl = le16_to_cpu(p->nport_handle); @@ -3180,18 +3163,14 @@ static uint16_t qla_edif_sadb_get_sa_index(fc_port_t *fcport, /* release any sadb entries -- only done at teardown */ void qla_edif_sadb_release(struct qla_hw_data *ha) { - struct list_head *pos; - struct list_head *tmp; - struct edif_sa_index_entry *entry; + struct edif_sa_index_entry *entry, *tmp; - list_for_each_safe(pos, tmp, &ha->sadb_rx_index_list) { - entry = list_entry(pos, struct edif_sa_index_entry, next); + list_for_each_entry_safe(entry, tmp, &ha->sadb_rx_index_list, next) { list_del(&entry->next); kfree(entry); } - list_for_each_safe(pos, tmp, &ha->sadb_tx_index_list) { - entry = list_entry(pos, struct edif_sa_index_entry, next); + list_for_each_entry_safe(entry, tmp, &ha->sadb_tx_index_list, next) { list_del(&entry->next); kfree(entry); } diff --git a/drivers/scsi/qla2xxx/qla_edif.h b/drivers/scsi/qla2xxx/qla_edif.h index 45cf87e3377807..32800bfb32a380 100644 --- a/drivers/scsi/qla2xxx/qla_edif.h +++ b/drivers/scsi/qla2xxx/qla_edif.h @@ -101,7 +101,6 @@ struct dinfo { }; struct pur_ninfo { - unsigned int pur_pend:1; port_id_t pur_sid; port_id_t pur_did; uint8_t vp_idx; diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index 12958aea893f30..c7ab8a8be24c72 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -3886,13 +3886,13 @@ qla2x00_remove_one(struct pci_dev *pdev) static inline void qla24xx_free_purex_list(struct purex_list *list) { - struct list_head *item, *next; + struct purex_item *item, *next; ulong flags; spin_lock_irqsave(&list->lock, flags); - list_for_each_safe(item, next, &list->head) { - list_del(item); - kfree(list_entry(item, struct purex_item, list)); + list_for_each_entry_safe(item, next, &list->head, list) { + list_del(&item->list); + kfree(item); } spin_unlock_irqrestore(&list->lock, flags); } From 9b7eb92dac240ab3bc83e188d83a3df834b41eb2 Mon Sep 17 00:00:00 2001 From: Arun Easi Date: Thu, 10 Mar 2022 01:25:56 -0800 Subject: [PATCH 0347/1056] scsi: qla2xxx: Fix crash during module load unload test [ Upstream commit 0972252450f90db56dd5415a20e2aec21a08d036 ] During purex packet handling the driver was incorrectly freeing a pre-allocated structure. Fix this by skipping that entry. System crashed with the following stack during a module unload test. Call Trace: sbitmap_init_node+0x7f/0x1e0 sbitmap_queue_init_node+0x24/0x150 blk_mq_init_bitmaps+0x3d/0xa0 blk_mq_init_tags+0x68/0x90 blk_mq_alloc_map_and_rqs+0x44/0x120 blk_mq_alloc_set_map_and_rqs+0x63/0x150 blk_mq_alloc_tag_set+0x11b/0x230 scsi_add_host_with_dma.cold+0x3f/0x245 qla2x00_probe_one+0xd5a/0x1b80 [qla2xxx] Call Trace with slub_debug and debug kernel: kasan_report_invalid_free+0x50/0x80 __kasan_slab_free+0x137/0x150 slab_free_freelist_hook+0xc6/0x190 kfree+0xe8/0x2e0 qla2x00_free_device+0x3bb/0x5d0 [qla2xxx] qla2x00_remove_one+0x668/0xcf0 [qla2xxx] Link: https://lore.kernel.org/r/20220310092604.22950-6-njavali@marvell.com Fixes: 62e9dd177732 ("scsi: qla2xxx: Change in PUREX to handle FPIN ELS requests") Cc: stable@vger.kernel.org Reported-by: Marco Patalano Tested-by: Marco Patalano Reviewed-by: Himanshu Madhani Signed-off-by: Arun Easi Signed-off-by: Nilesh Javali Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/qla2xxx/qla_os.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index c7ab8a8be24c72..e683b1c01c9f52 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -3892,6 +3892,8 @@ qla24xx_free_purex_list(struct purex_list *list) spin_lock_irqsave(&list->lock, flags); list_for_each_entry_safe(item, next, &list->head, list) { list_del(&item->list); + if (item == &item->vha->default_item) + continue; kfree(item); } spin_unlock_irqrestore(&list->lock, flags); From 008e29d172ca0a4f2f7147b64b06bbaa537500f8 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Thu, 17 Mar 2022 14:47:24 +0100 Subject: [PATCH 0348/1056] gfs2: Fix gfs2_file_buffered_write endless loop workaround [ Upstream commit 46f3e0421ccb5474b5c006b0089b9dfd42534bb6 ] Since commit 554c577cee95b, gfs2_file_buffered_write() can accidentally return a truncated iov_iter, which might confuse callers. Fix that. Fixes: 554c577cee95b ("gfs2: Prevent endless loops in gfs2_file_buffered_write") Signed-off-by: Andreas Gruenbacher Signed-off-by: Sasha Levin --- fs/gfs2/file.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index 60390f9dc31f41..e93185d804e0aa 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -1086,6 +1086,7 @@ static ssize_t gfs2_file_buffered_write(struct kiocb *iocb, gfs2_holder_uninit(gh); if (statfs_gh) kfree(statfs_gh); + from->count = orig_count - read; return read ? read : ret; } From 09674bfd8054b6d3667f7db8489551d3c8c1e3d9 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Mon, 21 Mar 2022 16:13:03 +0200 Subject: [PATCH 0349/1056] vdpa/mlx5: Avoid processing works if workqueue was destroyed [ Upstream commit ad6dc1daaf29f97f23cc810d60ee01c0e83f4c6b ] If mlx5_vdpa gets unloaded while a VM is running, the workqueue will be destroyed. However, vhost might still have reference to the kick function and might attempt to push new works. This could lead to null pointer dereference. To fix this, set mvdev->wq to NULL just before destroying and verify that the workqueue is not NULL in mlx5_vdpa_kick_vq before attempting to push a new work. Fixes: 5262912ef3cf ("vdpa/mlx5: Add support for control VQ and MAC setting") Signed-off-by: Eli Cohen Link: https://lore.kernel.org/r/20220321141303.9586-1-elic@nvidia.com Signed-off-by: Michael S. Tsirkin Signed-off-by: Sasha Levin --- drivers/vdpa/mlx5/net/mlx5_vnet.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c index 174895372e7f38..467a349dc26c0d 100644 --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c @@ -1641,7 +1641,7 @@ static void mlx5_vdpa_kick_vq(struct vdpa_device *vdev, u16 idx) return; if (unlikely(is_ctrl_vq_idx(mvdev, idx))) { - if (!mvdev->cvq.ready) + if (!mvdev->wq || !mvdev->cvq.ready) return; queue_work(mvdev->wq, &ndev->cvq_ent.work); @@ -2626,9 +2626,12 @@ static void mlx5_vdpa_dev_del(struct vdpa_mgmt_dev *v_mdev, struct vdpa_device * struct mlx5_vdpa_mgmtdev *mgtdev = container_of(v_mdev, struct mlx5_vdpa_mgmtdev, mgtdev); struct mlx5_vdpa_dev *mvdev = to_mvdev(dev); struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); + struct workqueue_struct *wq; mlx5_notifier_unregister(mvdev->mdev, &ndev->nb); - destroy_workqueue(mvdev->wq); + wq = mvdev->wq; + mvdev->wq = NULL; + destroy_workqueue(wq); _vdpa_unregister_device(dev); mgtdev->ndev = NULL; } From 5578b681fbf2b22d61189a2539efd3009518b328 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Tue, 5 Oct 2021 16:12:42 -0400 Subject: [PATCH 0350/1056] btrfs: handle device lookup with btrfs_dev_lookup_args [ Upstream commit 562d7b1512f7369a19bca2883e2e8672d78f0481 ] We have a lot of device lookup functions that all do something slightly different. Clean this up by adding a struct to hold the different lookup criteria, and then pass this around to btrfs_find_device() so it can do the proper matching based on the lookup criteria. Reviewed-by: Anand Jain Signed-off-by: Josef Bacik Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Sasha Levin --- fs/btrfs/dev-replace.c | 16 +++--- fs/btrfs/ioctl.c | 13 +++-- fs/btrfs/scrub.c | 6 +- fs/btrfs/volumes.c | 122 +++++++++++++++++++++++++---------------- fs/btrfs/volumes.h | 20 ++++++- 5 files changed, 112 insertions(+), 65 deletions(-) diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c index bdbc310a8f8c5a..781556e2a37f27 100644 --- a/fs/btrfs/dev-replace.c +++ b/fs/btrfs/dev-replace.c @@ -70,6 +70,7 @@ static int btrfs_dev_replace_kthread(void *data); int btrfs_init_dev_replace(struct btrfs_fs_info *fs_info) { + struct btrfs_dev_lookup_args args = { .devid = BTRFS_DEV_REPLACE_DEVID }; struct btrfs_key key; struct btrfs_root *dev_root = fs_info->dev_root; struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace; @@ -100,8 +101,7 @@ int btrfs_init_dev_replace(struct btrfs_fs_info *fs_info) * We don't have a replace item or it's corrupted. If there is * a replace target, fail the mount. */ - if (btrfs_find_device(fs_info->fs_devices, - BTRFS_DEV_REPLACE_DEVID, NULL, NULL)) { + if (btrfs_find_device(fs_info->fs_devices, &args)) { btrfs_err(fs_info, "found replace target device without a valid replace item"); ret = -EUCLEAN; @@ -163,8 +163,7 @@ int btrfs_init_dev_replace(struct btrfs_fs_info *fs_info) * We don't have an active replace item but if there is a * replace target, fail the mount. */ - if (btrfs_find_device(fs_info->fs_devices, - BTRFS_DEV_REPLACE_DEVID, NULL, NULL)) { + if (btrfs_find_device(fs_info->fs_devices, &args)) { btrfs_err(fs_info, "replace devid present without an active replace item"); ret = -EUCLEAN; @@ -175,11 +174,10 @@ int btrfs_init_dev_replace(struct btrfs_fs_info *fs_info) break; case BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED: case BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED: - dev_replace->srcdev = btrfs_find_device(fs_info->fs_devices, - src_devid, NULL, NULL); - dev_replace->tgtdev = btrfs_find_device(fs_info->fs_devices, - BTRFS_DEV_REPLACE_DEVID, - NULL, NULL); + dev_replace->tgtdev = btrfs_find_device(fs_info->fs_devices, &args); + args.devid = src_devid; + dev_replace->srcdev = btrfs_find_device(fs_info->fs_devices, &args); + /* * allow 'btrfs dev replace_cancel' if src/tgt device is * missing diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index a37ab3e89a3bcc..4951a2ab88ddc0 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -1657,6 +1657,7 @@ static int exclop_start_or_cancel_reloc(struct btrfs_fs_info *fs_info, static noinline int btrfs_ioctl_resize(struct file *file, void __user *arg) { + BTRFS_DEV_LOOKUP_ARGS(args); struct inode *inode = file_inode(file); struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); u64 new_size; @@ -1712,7 +1713,8 @@ static noinline int btrfs_ioctl_resize(struct file *file, btrfs_info(fs_info, "resizing devid %llu", devid); } - device = btrfs_find_device(fs_info->fs_devices, devid, NULL, NULL); + args.devid = devid; + device = btrfs_find_device(fs_info->fs_devices, &args); if (!device) { btrfs_info(fs_info, "resizer unable to find device %llu", devid); @@ -3375,22 +3377,21 @@ static long btrfs_ioctl_fs_info(struct btrfs_fs_info *fs_info, static long btrfs_ioctl_dev_info(struct btrfs_fs_info *fs_info, void __user *arg) { + BTRFS_DEV_LOOKUP_ARGS(args); struct btrfs_ioctl_dev_info_args *di_args; struct btrfs_device *dev; int ret = 0; - char *s_uuid = NULL; di_args = memdup_user(arg, sizeof(*di_args)); if (IS_ERR(di_args)) return PTR_ERR(di_args); + args.devid = di_args->devid; if (!btrfs_is_empty_uuid(di_args->uuid)) - s_uuid = di_args->uuid; + args.uuid = di_args->uuid; rcu_read_lock(); - dev = btrfs_find_device(fs_info->fs_devices, di_args->devid, s_uuid, - NULL); - + dev = btrfs_find_device(fs_info->fs_devices, &args); if (!dev) { ret = -ENODEV; goto out; diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 62f4bafbe54bb2..6f2787b21530ff 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -4068,6 +4068,7 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start, u64 end, struct btrfs_scrub_progress *progress, int readonly, int is_dev_replace) { + struct btrfs_dev_lookup_args args = { .devid = devid }; struct scrub_ctx *sctx; int ret; struct btrfs_device *dev; @@ -4115,7 +4116,7 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start, goto out_free_ctx; mutex_lock(&fs_info->fs_devices->device_list_mutex); - dev = btrfs_find_device(fs_info->fs_devices, devid, NULL, NULL); + dev = btrfs_find_device(fs_info->fs_devices, &args); if (!dev || (test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state) && !is_dev_replace)) { mutex_unlock(&fs_info->fs_devices->device_list_mutex); @@ -4288,11 +4289,12 @@ int btrfs_scrub_cancel_dev(struct btrfs_device *dev) int btrfs_scrub_progress(struct btrfs_fs_info *fs_info, u64 devid, struct btrfs_scrub_progress *progress) { + struct btrfs_dev_lookup_args args = { .devid = devid }; struct btrfs_device *dev; struct scrub_ctx *sctx = NULL; mutex_lock(&fs_info->fs_devices->device_list_mutex); - dev = btrfs_find_device(fs_info->fs_devices, devid, NULL, NULL); + dev = btrfs_find_device(fs_info->fs_devices, &args); if (dev) sctx = dev->scrub_ctx; if (sctx) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index fa68efd7e61063..53417a1c54021d 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -844,9 +844,13 @@ static noinline struct btrfs_device *device_list_add(const char *path, device = NULL; } else { + struct btrfs_dev_lookup_args args = { + .devid = devid, + .uuid = disk_super->dev_item.uuid, + }; + mutex_lock(&fs_devices->device_list_mutex); - device = btrfs_find_device(fs_devices, devid, - disk_super->dev_item.uuid, NULL); + device = btrfs_find_device(fs_devices, &args); /* * If this disk has been pulled into an fs devices created by @@ -2360,10 +2364,9 @@ void btrfs_destroy_dev_replace_tgtdev(struct btrfs_device *tgtdev) static struct btrfs_device *btrfs_find_device_by_path( struct btrfs_fs_info *fs_info, const char *device_path) { + BTRFS_DEV_LOOKUP_ARGS(args); int ret = 0; struct btrfs_super_block *disk_super; - u64 devid; - u8 *dev_uuid; struct block_device *bdev; struct btrfs_device *device; @@ -2372,14 +2375,14 @@ static struct btrfs_device *btrfs_find_device_by_path( if (ret) return ERR_PTR(ret); - devid = btrfs_stack_device_id(&disk_super->dev_item); - dev_uuid = disk_super->dev_item.uuid; + args.devid = btrfs_stack_device_id(&disk_super->dev_item); + args.uuid = disk_super->dev_item.uuid; if (btrfs_fs_incompat(fs_info, METADATA_UUID)) - device = btrfs_find_device(fs_info->fs_devices, devid, dev_uuid, - disk_super->metadata_uuid); + args.fsid = disk_super->metadata_uuid; else - device = btrfs_find_device(fs_info->fs_devices, devid, dev_uuid, - disk_super->fsid); + args.fsid = disk_super->fsid; + + device = btrfs_find_device(fs_info->fs_devices, &args); btrfs_release_disk_super(disk_super); if (!device) @@ -2395,11 +2398,12 @@ struct btrfs_device *btrfs_find_device_by_devspec( struct btrfs_fs_info *fs_info, u64 devid, const char *device_path) { + BTRFS_DEV_LOOKUP_ARGS(args); struct btrfs_device *device; if (devid) { - device = btrfs_find_device(fs_info->fs_devices, devid, NULL, - NULL); + args.devid = devid; + device = btrfs_find_device(fs_info->fs_devices, &args); if (!device) return ERR_PTR(-ENOENT); return device; @@ -2409,14 +2413,11 @@ struct btrfs_device *btrfs_find_device_by_devspec( return ERR_PTR(-EINVAL); if (strcmp(device_path, "missing") == 0) { - /* Find first missing device */ - list_for_each_entry(device, &fs_info->fs_devices->devices, - dev_list) { - if (test_bit(BTRFS_DEV_STATE_IN_FS_METADATA, - &device->dev_state) && !device->bdev) - return device; - } - return ERR_PTR(-ENOENT); + args.missing = true; + device = btrfs_find_device(fs_info->fs_devices, &args); + if (!device) + return ERR_PTR(-ENOENT); + return device; } return btrfs_find_device_by_path(fs_info, device_path); @@ -2496,6 +2497,7 @@ static int btrfs_prepare_sprout(struct btrfs_fs_info *fs_info) */ static int btrfs_finish_sprout(struct btrfs_trans_handle *trans) { + BTRFS_DEV_LOOKUP_ARGS(args); struct btrfs_fs_info *fs_info = trans->fs_info; struct btrfs_root *root = fs_info->chunk_root; struct btrfs_path *path; @@ -2505,7 +2507,6 @@ static int btrfs_finish_sprout(struct btrfs_trans_handle *trans) struct btrfs_key key; u8 fs_uuid[BTRFS_FSID_SIZE]; u8 dev_uuid[BTRFS_UUID_SIZE]; - u64 devid; int ret; path = btrfs_alloc_path(); @@ -2544,13 +2545,14 @@ static int btrfs_finish_sprout(struct btrfs_trans_handle *trans) dev_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dev_item); - devid = btrfs_device_id(leaf, dev_item); + args.devid = btrfs_device_id(leaf, dev_item); read_extent_buffer(leaf, dev_uuid, btrfs_device_uuid(dev_item), BTRFS_UUID_SIZE); read_extent_buffer(leaf, fs_uuid, btrfs_device_fsid(dev_item), BTRFS_FSID_SIZE); - device = btrfs_find_device(fs_info->fs_devices, devid, dev_uuid, - fs_uuid); + args.uuid = dev_uuid; + args.fsid = fs_uuid; + device = btrfs_find_device(fs_info->fs_devices, &args); BUG_ON(!device); /* Logic error */ if (device->fs_devices->seeding) { @@ -6805,6 +6807,33 @@ blk_status_t btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio, return BLK_STS_OK; } +static bool dev_args_match_fs_devices(const struct btrfs_dev_lookup_args *args, + const struct btrfs_fs_devices *fs_devices) +{ + if (args->fsid == NULL) + return true; + if (memcmp(fs_devices->metadata_uuid, args->fsid, BTRFS_FSID_SIZE) == 0) + return true; + return false; +} + +static bool dev_args_match_device(const struct btrfs_dev_lookup_args *args, + const struct btrfs_device *device) +{ + ASSERT((args->devid != (u64)-1) || args->missing); + + if ((args->devid != (u64)-1) && device->devid != args->devid) + return false; + if (args->uuid && memcmp(device->uuid, args->uuid, BTRFS_UUID_SIZE) != 0) + return false; + if (!args->missing) + return true; + if (test_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state) && + !device->bdev) + return true; + return false; +} + /* * Find a device specified by @devid or @uuid in the list of @fs_devices, or * return NULL. @@ -6812,31 +6841,25 @@ blk_status_t btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio, * If devid and uuid are both specified, the match must be exact, otherwise * only devid is used. */ -struct btrfs_device *btrfs_find_device(struct btrfs_fs_devices *fs_devices, - u64 devid, u8 *uuid, u8 *fsid) +struct btrfs_device *btrfs_find_device(const struct btrfs_fs_devices *fs_devices, + const struct btrfs_dev_lookup_args *args) { struct btrfs_device *device; struct btrfs_fs_devices *seed_devs; - if (!fsid || !memcmp(fs_devices->metadata_uuid, fsid, BTRFS_FSID_SIZE)) { + if (dev_args_match_fs_devices(args, fs_devices)) { list_for_each_entry(device, &fs_devices->devices, dev_list) { - if (device->devid == devid && - (!uuid || memcmp(device->uuid, uuid, - BTRFS_UUID_SIZE) == 0)) + if (dev_args_match_device(args, device)) return device; } } list_for_each_entry(seed_devs, &fs_devices->seed_list, seed_list) { - if (!fsid || - !memcmp(seed_devs->metadata_uuid, fsid, BTRFS_FSID_SIZE)) { - list_for_each_entry(device, &seed_devs->devices, - dev_list) { - if (device->devid == devid && - (!uuid || memcmp(device->uuid, uuid, - BTRFS_UUID_SIZE) == 0)) - return device; - } + if (!dev_args_match_fs_devices(args, seed_devs)) + continue; + list_for_each_entry(device, &seed_devs->devices, dev_list) { + if (dev_args_match_device(args, device)) + return device; } } @@ -7002,6 +7025,7 @@ static void warn_32bit_meta_chunk(struct btrfs_fs_info *fs_info, static int read_one_chunk(struct btrfs_key *key, struct extent_buffer *leaf, struct btrfs_chunk *chunk) { + BTRFS_DEV_LOOKUP_ARGS(args); struct btrfs_fs_info *fs_info = leaf->fs_info; struct extent_map_tree *map_tree = &fs_info->mapping_tree; struct map_lookup *map; @@ -7079,11 +7103,12 @@ static int read_one_chunk(struct btrfs_key *key, struct extent_buffer *leaf, map->stripes[i].physical = btrfs_stripe_offset_nr(leaf, chunk, i); devid = btrfs_stripe_devid_nr(leaf, chunk, i); + args.devid = devid; read_extent_buffer(leaf, uuid, (unsigned long) btrfs_stripe_dev_uuid_nr(chunk, i), BTRFS_UUID_SIZE); - map->stripes[i].dev = btrfs_find_device(fs_info->fs_devices, - devid, uuid, NULL); + args.uuid = uuid; + map->stripes[i].dev = btrfs_find_device(fs_info->fs_devices, &args); if (!map->stripes[i].dev && !btrfs_test_opt(fs_info, DEGRADED)) { free_extent_map(em); @@ -7201,6 +7226,7 @@ static struct btrfs_fs_devices *open_seed_devices(struct btrfs_fs_info *fs_info, static int read_one_dev(struct extent_buffer *leaf, struct btrfs_dev_item *dev_item) { + BTRFS_DEV_LOOKUP_ARGS(args); struct btrfs_fs_info *fs_info = leaf->fs_info; struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; struct btrfs_device *device; @@ -7209,11 +7235,13 @@ static int read_one_dev(struct extent_buffer *leaf, u8 fs_uuid[BTRFS_FSID_SIZE]; u8 dev_uuid[BTRFS_UUID_SIZE]; - devid = btrfs_device_id(leaf, dev_item); + devid = args.devid = btrfs_device_id(leaf, dev_item); read_extent_buffer(leaf, dev_uuid, btrfs_device_uuid(dev_item), BTRFS_UUID_SIZE); read_extent_buffer(leaf, fs_uuid, btrfs_device_fsid(dev_item), BTRFS_FSID_SIZE); + args.uuid = dev_uuid; + args.fsid = fs_uuid; if (memcmp(fs_uuid, fs_devices->metadata_uuid, BTRFS_FSID_SIZE)) { fs_devices = open_seed_devices(fs_info, fs_uuid); @@ -7221,8 +7249,7 @@ static int read_one_dev(struct extent_buffer *leaf, return PTR_ERR(fs_devices); } - device = btrfs_find_device(fs_info->fs_devices, devid, dev_uuid, - fs_uuid); + device = btrfs_find_device(fs_info->fs_devices, &args); if (!device) { if (!btrfs_test_opt(fs_info, DEGRADED)) { btrfs_report_missing_device(fs_info, devid, @@ -7899,12 +7926,14 @@ static void btrfs_dev_stat_print_on_load(struct btrfs_device *dev) int btrfs_get_dev_stats(struct btrfs_fs_info *fs_info, struct btrfs_ioctl_get_dev_stats *stats) { + BTRFS_DEV_LOOKUP_ARGS(args); struct btrfs_device *dev; struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; int i; mutex_lock(&fs_devices->device_list_mutex); - dev = btrfs_find_device(fs_info->fs_devices, stats->devid, NULL, NULL); + args.devid = stats->devid; + dev = btrfs_find_device(fs_info->fs_devices, &args); mutex_unlock(&fs_devices->device_list_mutex); if (!dev) { @@ -7980,6 +8009,7 @@ static int verify_one_dev_extent(struct btrfs_fs_info *fs_info, u64 chunk_offset, u64 devid, u64 physical_offset, u64 physical_len) { + struct btrfs_dev_lookup_args args = { .devid = devid }; struct extent_map_tree *em_tree = &fs_info->mapping_tree; struct extent_map *em; struct map_lookup *map; @@ -8035,7 +8065,7 @@ static int verify_one_dev_extent(struct btrfs_fs_info *fs_info, } /* Make sure no dev extent is beyond device boundary */ - dev = btrfs_find_device(fs_info->fs_devices, devid, NULL, NULL); + dev = btrfs_find_device(fs_info->fs_devices, &args); if (!dev) { btrfs_err(fs_info, "failed to find devid %llu", devid); ret = -EUCLEAN; diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index d86a6f9f166cd8..f3b1380f45ada7 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -418,6 +418,22 @@ struct btrfs_balance_control { struct btrfs_balance_progress stat; }; +/* + * Search for a given device by the set parameters + */ +struct btrfs_dev_lookup_args { + u64 devid; + u8 *uuid; + u8 *fsid; + bool missing; +}; + +/* We have to initialize to -1 because BTRFS_DEV_REPLACE_DEVID is 0 */ +#define BTRFS_DEV_LOOKUP_ARGS_INIT { .devid = (u64)-1 } + +#define BTRFS_DEV_LOOKUP_ARGS(name) \ + struct btrfs_dev_lookup_args name = BTRFS_DEV_LOOKUP_ARGS_INIT + enum btrfs_map_op { BTRFS_MAP_READ, BTRFS_MAP_WRITE, @@ -482,8 +498,8 @@ void __exit btrfs_cleanup_fs_uuids(void); int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len); int btrfs_grow_device(struct btrfs_trans_handle *trans, struct btrfs_device *device, u64 new_size); -struct btrfs_device *btrfs_find_device(struct btrfs_fs_devices *fs_devices, - u64 devid, u8 *uuid, u8 *fsid); +struct btrfs_device *btrfs_find_device(const struct btrfs_fs_devices *fs_devices, + const struct btrfs_dev_lookup_args *args); int btrfs_shrink_device(struct btrfs_device *device, u64 new_size); int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *path); int btrfs_balance(struct btrfs_fs_info *fs_info, From 321a81835b4aed4f717f89921286f6544ffa8be9 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Tue, 5 Oct 2021 16:12:43 -0400 Subject: [PATCH 0351/1056] btrfs: add a btrfs_get_dev_args_from_path helper [ Upstream commit faa775c41d655a4786e9d53cb075a77bb5a75f66 ] We are going to want to populate our device lookup args outside of any locks and then do the actual device lookup later, so add a helper to do this work and make btrfs_find_device_by_devspec() use this helper for now. Reviewed-by: Nikolay Borisov Reviewed-by: Anand Jain Signed-off-by: Josef Bacik Signed-off-by: David Sterba Signed-off-by: Sasha Levin --- fs/btrfs/volumes.c | 96 ++++++++++++++++++++++++++++++---------------- fs/btrfs/volumes.h | 4 ++ 2 files changed, 68 insertions(+), 32 deletions(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 53417a1c54021d..8d09e6d442b26a 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -2361,45 +2361,81 @@ void btrfs_destroy_dev_replace_tgtdev(struct btrfs_device *tgtdev) btrfs_free_device(tgtdev); } -static struct btrfs_device *btrfs_find_device_by_path( - struct btrfs_fs_info *fs_info, const char *device_path) +/** + * Populate args from device at path + * + * @fs_info: the filesystem + * @args: the args to populate + * @path: the path to the device + * + * This will read the super block of the device at @path and populate @args with + * the devid, fsid, and uuid. This is meant to be used for ioctls that need to + * lookup a device to operate on, but need to do it before we take any locks. + * This properly handles the special case of "missing" that a user may pass in, + * and does some basic sanity checks. The caller must make sure that @path is + * properly NUL terminated before calling in, and must call + * btrfs_put_dev_args_from_path() in order to free up the temporary fsid and + * uuid buffers. + * + * Return: 0 for success, -errno for failure + */ +int btrfs_get_dev_args_from_path(struct btrfs_fs_info *fs_info, + struct btrfs_dev_lookup_args *args, + const char *path) { - BTRFS_DEV_LOOKUP_ARGS(args); - int ret = 0; struct btrfs_super_block *disk_super; struct block_device *bdev; - struct btrfs_device *device; + int ret; - ret = btrfs_get_bdev_and_sb(device_path, FMODE_READ, - fs_info->bdev_holder, 0, &bdev, &disk_super); - if (ret) - return ERR_PTR(ret); + if (!path || !path[0]) + return -EINVAL; + if (!strcmp(path, "missing")) { + args->missing = true; + return 0; + } + + args->uuid = kzalloc(BTRFS_UUID_SIZE, GFP_KERNEL); + args->fsid = kzalloc(BTRFS_FSID_SIZE, GFP_KERNEL); + if (!args->uuid || !args->fsid) { + btrfs_put_dev_args_from_path(args); + return -ENOMEM; + } - args.devid = btrfs_stack_device_id(&disk_super->dev_item); - args.uuid = disk_super->dev_item.uuid; + ret = btrfs_get_bdev_and_sb(path, FMODE_READ, fs_info->bdev_holder, 0, + &bdev, &disk_super); + if (ret) + return ret; + args->devid = btrfs_stack_device_id(&disk_super->dev_item); + memcpy(args->uuid, disk_super->dev_item.uuid, BTRFS_UUID_SIZE); if (btrfs_fs_incompat(fs_info, METADATA_UUID)) - args.fsid = disk_super->metadata_uuid; + memcpy(args->fsid, disk_super->metadata_uuid, BTRFS_FSID_SIZE); else - args.fsid = disk_super->fsid; - - device = btrfs_find_device(fs_info->fs_devices, &args); - + memcpy(args->fsid, disk_super->fsid, BTRFS_FSID_SIZE); btrfs_release_disk_super(disk_super); - if (!device) - device = ERR_PTR(-ENOENT); blkdev_put(bdev, FMODE_READ); - return device; + return 0; } /* - * Lookup a device given by device id, or the path if the id is 0. + * Only use this jointly with btrfs_get_dev_args_from_path() because we will + * allocate our ->uuid and ->fsid pointers, everybody else uses local variables + * that don't need to be freed. */ +void btrfs_put_dev_args_from_path(struct btrfs_dev_lookup_args *args) +{ + kfree(args->uuid); + kfree(args->fsid); + args->uuid = NULL; + args->fsid = NULL; +} + struct btrfs_device *btrfs_find_device_by_devspec( struct btrfs_fs_info *fs_info, u64 devid, const char *device_path) { BTRFS_DEV_LOOKUP_ARGS(args); struct btrfs_device *device; + int ret; if (devid) { args.devid = devid; @@ -2409,18 +2445,14 @@ struct btrfs_device *btrfs_find_device_by_devspec( return device; } - if (!device_path || !device_path[0]) - return ERR_PTR(-EINVAL); - - if (strcmp(device_path, "missing") == 0) { - args.missing = true; - device = btrfs_find_device(fs_info->fs_devices, &args); - if (!device) - return ERR_PTR(-ENOENT); - return device; - } - - return btrfs_find_device_by_path(fs_info, device_path); + ret = btrfs_get_dev_args_from_path(fs_info, &args, device_path); + if (ret) + return ERR_PTR(ret); + device = btrfs_find_device(fs_info->fs_devices, &args); + btrfs_put_dev_args_from_path(&args); + if (!device) + return ERR_PTR(-ENOENT); + return device; } /* diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index f3b1380f45ada7..d1df03f77e298b 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -487,9 +487,13 @@ void btrfs_assign_next_active_device(struct btrfs_device *device, struct btrfs_device *btrfs_find_device_by_devspec(struct btrfs_fs_info *fs_info, u64 devid, const char *devpath); +int btrfs_get_dev_args_from_path(struct btrfs_fs_info *fs_info, + struct btrfs_dev_lookup_args *args, + const char *path); struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info, const u64 *devid, const u8 *uuid); +void btrfs_put_dev_args_from_path(struct btrfs_dev_lookup_args *args); void btrfs_free_device(struct btrfs_device *device); int btrfs_rm_device(struct btrfs_fs_info *fs_info, const char *device_path, u64 devid, From f75534a71abf01e46a06aacff9535baf7c9e9b96 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Tue, 5 Oct 2021 16:12:44 -0400 Subject: [PATCH 0352/1056] btrfs: use btrfs_get_dev_args_from_path in dev removal ioctls [ Upstream commit 1a15eb724aaef8656f8cc01d9355797cfe7c618e ] For device removal and replace we call btrfs_find_device_by_devspec, which if we give it a device path and nothing else will call btrfs_get_dev_args_from_path, which opens the block device and reads the super block and then looks up our device based on that. However at this point we're holding the sb write "lock", so reading the block device pulls in the dependency of ->open_mutex, which produces the following lockdep splat ====================================================== WARNING: possible circular locking dependency detected 5.14.0-rc2+ #405 Not tainted ------------------------------------------------------ losetup/11576 is trying to acquire lock: ffff9bbe8cded938 ((wq_completion)loop0){+.+.}-{0:0}, at: flush_workqueue+0x67/0x5e0 but task is already holding lock: ffff9bbe88e4fc68 (&lo->lo_mutex){+.+.}-{3:3}, at: __loop_clr_fd+0x41/0x660 [loop] which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #4 (&lo->lo_mutex){+.+.}-{3:3}: __mutex_lock+0x7d/0x750 lo_open+0x28/0x60 [loop] blkdev_get_whole+0x25/0xf0 blkdev_get_by_dev.part.0+0x168/0x3c0 blkdev_open+0xd2/0xe0 do_dentry_open+0x161/0x390 path_openat+0x3cc/0xa20 do_filp_open+0x96/0x120 do_sys_openat2+0x7b/0x130 __x64_sys_openat+0x46/0x70 do_syscall_64+0x38/0x90 entry_SYSCALL_64_after_hwframe+0x44/0xae -> #3 (&disk->open_mutex){+.+.}-{3:3}: __mutex_lock+0x7d/0x750 blkdev_get_by_dev.part.0+0x56/0x3c0 blkdev_get_by_path+0x98/0xa0 btrfs_get_bdev_and_sb+0x1b/0xb0 btrfs_find_device_by_devspec+0x12b/0x1c0 btrfs_rm_device+0x127/0x610 btrfs_ioctl+0x2a31/0x2e70 __x64_sys_ioctl+0x80/0xb0 do_syscall_64+0x38/0x90 entry_SYSCALL_64_after_hwframe+0x44/0xae -> #2 (sb_writers#12){.+.+}-{0:0}: lo_write_bvec+0xc2/0x240 [loop] loop_process_work+0x238/0xd00 [loop] process_one_work+0x26b/0x560 worker_thread+0x55/0x3c0 kthread+0x140/0x160 ret_from_fork+0x1f/0x30 -> #1 ((work_completion)(&lo->rootcg_work)){+.+.}-{0:0}: process_one_work+0x245/0x560 worker_thread+0x55/0x3c0 kthread+0x140/0x160 ret_from_fork+0x1f/0x30 -> #0 ((wq_completion)loop0){+.+.}-{0:0}: __lock_acquire+0x10ea/0x1d90 lock_acquire+0xb5/0x2b0 flush_workqueue+0x91/0x5e0 drain_workqueue+0xa0/0x110 destroy_workqueue+0x36/0x250 __loop_clr_fd+0x9a/0x660 [loop] block_ioctl+0x3f/0x50 __x64_sys_ioctl+0x80/0xb0 do_syscall_64+0x38/0x90 entry_SYSCALL_64_after_hwframe+0x44/0xae other info that might help us debug this: Chain exists of: (wq_completion)loop0 --> &disk->open_mutex --> &lo->lo_mutex Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&lo->lo_mutex); lock(&disk->open_mutex); lock(&lo->lo_mutex); lock((wq_completion)loop0); *** DEADLOCK *** 1 lock held by losetup/11576: #0: ffff9bbe88e4fc68 (&lo->lo_mutex){+.+.}-{3:3}, at: __loop_clr_fd+0x41/0x660 [loop] stack backtrace: CPU: 0 PID: 11576 Comm: losetup Not tainted 5.14.0-rc2+ #405 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.13.0-2.fc32 04/01/2014 Call Trace: dump_stack_lvl+0x57/0x72 check_noncircular+0xcf/0xf0 ? stack_trace_save+0x3b/0x50 __lock_acquire+0x10ea/0x1d90 lock_acquire+0xb5/0x2b0 ? flush_workqueue+0x67/0x5e0 ? lockdep_init_map_type+0x47/0x220 flush_workqueue+0x91/0x5e0 ? flush_workqueue+0x67/0x5e0 ? verify_cpu+0xf0/0x100 drain_workqueue+0xa0/0x110 destroy_workqueue+0x36/0x250 __loop_clr_fd+0x9a/0x660 [loop] ? blkdev_ioctl+0x8d/0x2a0 block_ioctl+0x3f/0x50 __x64_sys_ioctl+0x80/0xb0 do_syscall_64+0x38/0x90 entry_SYSCALL_64_after_hwframe+0x44/0xae RIP: 0033:0x7f31b02404cb Instead what we want to do is populate our device lookup args before we grab any locks, and then pass these args into btrfs_rm_device(). From there we can find the device and do the appropriate removal. Suggested-by: Anand Jain Reviewed-by: Anand Jain Signed-off-by: Josef Bacik Signed-off-by: David Sterba Signed-off-by: Sasha Levin --- fs/btrfs/ioctl.c | 67 +++++++++++++++++++++++++++------------------- fs/btrfs/volumes.c | 15 +++++------ fs/btrfs/volumes.h | 2 +- 3 files changed, 48 insertions(+), 36 deletions(-) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 4951a2ab88ddc0..4317720a29e855 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -3218,6 +3218,7 @@ static long btrfs_ioctl_add_dev(struct btrfs_fs_info *fs_info, void __user *arg) static long btrfs_ioctl_rm_dev_v2(struct file *file, void __user *arg) { + BTRFS_DEV_LOOKUP_ARGS(args); struct inode *inode = file_inode(file); struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); struct btrfs_ioctl_vol_args_v2 *vol_args; @@ -3229,35 +3230,39 @@ static long btrfs_ioctl_rm_dev_v2(struct file *file, void __user *arg) if (!capable(CAP_SYS_ADMIN)) return -EPERM; - ret = mnt_want_write_file(file); - if (ret) - return ret; - vol_args = memdup_user(arg, sizeof(*vol_args)); if (IS_ERR(vol_args)) { ret = PTR_ERR(vol_args); - goto err_drop; + goto out; } if (vol_args->flags & ~BTRFS_DEVICE_REMOVE_ARGS_MASK) { ret = -EOPNOTSUPP; goto out; } + vol_args->name[BTRFS_SUBVOL_NAME_MAX] = '\0'; - if (!(vol_args->flags & BTRFS_DEVICE_SPEC_BY_ID) && - strcmp("cancel", vol_args->name) == 0) + if (vol_args->flags & BTRFS_DEVICE_SPEC_BY_ID) { + args.devid = vol_args->devid; + } else if (!strcmp("cancel", vol_args->name)) { cancel = true; + } else { + ret = btrfs_get_dev_args_from_path(fs_info, &args, vol_args->name); + if (ret) + goto out; + } + + ret = mnt_want_write_file(file); + if (ret) + goto out; ret = exclop_start_or_cancel_reloc(fs_info, BTRFS_EXCLOP_DEV_REMOVE, cancel); if (ret) - goto out; - /* Exclusive operation is now claimed */ + goto err_drop; - if (vol_args->flags & BTRFS_DEVICE_SPEC_BY_ID) - ret = btrfs_rm_device(fs_info, NULL, vol_args->devid, &bdev, &mode); - else - ret = btrfs_rm_device(fs_info, vol_args->name, 0, &bdev, &mode); + /* Exclusive operation is now claimed */ + ret = btrfs_rm_device(fs_info, &args, &bdev, &mode); btrfs_exclop_finish(fs_info); @@ -3269,17 +3274,19 @@ static long btrfs_ioctl_rm_dev_v2(struct file *file, void __user *arg) btrfs_info(fs_info, "device deleted: %s", vol_args->name); } -out: - kfree(vol_args); err_drop: mnt_drop_write_file(file); if (bdev) blkdev_put(bdev, mode); +out: + btrfs_put_dev_args_from_path(&args); + kfree(vol_args); return ret; } static long btrfs_ioctl_rm_dev(struct file *file, void __user *arg) { + BTRFS_DEV_LOOKUP_ARGS(args); struct inode *inode = file_inode(file); struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); struct btrfs_ioctl_vol_args *vol_args; @@ -3291,32 +3298,38 @@ static long btrfs_ioctl_rm_dev(struct file *file, void __user *arg) if (!capable(CAP_SYS_ADMIN)) return -EPERM; - ret = mnt_want_write_file(file); - if (ret) - return ret; - vol_args = memdup_user(arg, sizeof(*vol_args)); - if (IS_ERR(vol_args)) { - ret = PTR_ERR(vol_args); - goto out_drop_write; - } + if (IS_ERR(vol_args)) + return PTR_ERR(vol_args); + vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; - cancel = (strcmp("cancel", vol_args->name) == 0); + if (!strcmp("cancel", vol_args->name)) { + cancel = true; + } else { + ret = btrfs_get_dev_args_from_path(fs_info, &args, vol_args->name); + if (ret) + goto out; + } + + ret = mnt_want_write_file(file); + if (ret) + goto out; ret = exclop_start_or_cancel_reloc(fs_info, BTRFS_EXCLOP_DEV_REMOVE, cancel); if (ret == 0) { - ret = btrfs_rm_device(fs_info, vol_args->name, 0, &bdev, &mode); + ret = btrfs_rm_device(fs_info, &args, &bdev, &mode); if (!ret) btrfs_info(fs_info, "disk deleted %s", vol_args->name); btrfs_exclop_finish(fs_info); } - kfree(vol_args); -out_drop_write: mnt_drop_write_file(file); if (bdev) blkdev_put(bdev, mode); +out: + btrfs_put_dev_args_from_path(&args); + kfree(vol_args); return ret; } diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 8d09e6d442b26a..3bd68f1b79e6f2 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -2120,8 +2120,9 @@ void btrfs_scratch_superblocks(struct btrfs_fs_info *fs_info, update_dev_time(device_path); } -int btrfs_rm_device(struct btrfs_fs_info *fs_info, const char *device_path, - u64 devid, struct block_device **bdev, fmode_t *mode) +int btrfs_rm_device(struct btrfs_fs_info *fs_info, + struct btrfs_dev_lookup_args *args, + struct block_device **bdev, fmode_t *mode) { struct btrfs_device *device; struct btrfs_fs_devices *cur_devices; @@ -2140,14 +2141,12 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info, const char *device_path, if (ret) goto out; - device = btrfs_find_device_by_devspec(fs_info, devid, device_path); - - if (IS_ERR(device)) { - if (PTR_ERR(device) == -ENOENT && - device_path && strcmp(device_path, "missing") == 0) + device = btrfs_find_device(fs_info->fs_devices, args); + if (!device) { + if (args->missing) ret = BTRFS_ERROR_DEV_MISSING_NOT_FOUND; else - ret = PTR_ERR(device); + ret = -ENOENT; goto out; } diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index d1df03f77e298b..30288b728bbbdc 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -496,7 +496,7 @@ struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info, void btrfs_put_dev_args_from_path(struct btrfs_dev_lookup_args *args); void btrfs_free_device(struct btrfs_device *device); int btrfs_rm_device(struct btrfs_fs_info *fs_info, - const char *device_path, u64 devid, + struct btrfs_dev_lookup_args *args, struct block_device **bdev, fmode_t *mode); void __exit btrfs_cleanup_fs_uuids(void); int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len); From 72fa2ea3e0ab859f627ee0bf9b93ca9ba64df95e Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Tue, 8 Mar 2022 13:36:38 +0800 Subject: [PATCH 0353/1056] btrfs: remove device item and update super block in the same transaction MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit bbac58698a55cc0a6f0c0d69a6dcd3f9f3134c11 ] [BUG] There is a report that a btrfs has a bad super block num devices. This makes btrfs to reject the fs completely. BTRFS error (device sdd3): super_num_devices 3 mismatch with num_devices 2 found here BTRFS error (device sdd3): failed to read chunk tree: -22 BTRFS error (device sdd3): open_ctree failed [CAUSE] During btrfs device removal, chunk tree and super block num devs are updated in two different transactions: btrfs_rm_device() |- btrfs_rm_dev_item(device) | |- trans = btrfs_start_transaction() | | Now we got transaction X | | | |- btrfs_del_item() | | Now device item is removed from chunk tree | | | |- btrfs_commit_transaction() | Transaction X got committed, super num devs untouched, | but device item removed from chunk tree. | (AKA, super num devs is already incorrect) | |- cur_devices->num_devices--; |- cur_devices->total_devices--; |- btrfs_set_super_num_devices() All those operations are not in transaction X, thus it will only be written back to disk in next transaction. So after the transaction X in btrfs_rm_dev_item() committed, but before transaction X+1 (which can be minutes away), a power loss happen, then we got the super num mismatch. [FIX] Instead of starting and committing a transaction inside btrfs_rm_dev_item(), start a transaction in side btrfs_rm_device() and pass it to btrfs_rm_dev_item(). And only commit the transaction after everything is done. Reported-by: Luca Béla Palkovics Link: https://lore.kernel.org/linux-btrfs/CA+8xDSpvdm_U0QLBAnrH=zqDq_cWCOH5TiV46CKmp3igr44okQ@mail.gmail.com/ CC: stable@vger.kernel.org # 4.14+ Reviewed-by: Anand Jain Signed-off-by: Qu Wenruo Signed-off-by: David Sterba Signed-off-by: Sasha Levin --- fs/btrfs/volumes.c | 65 ++++++++++++++++++++-------------------------- 1 file changed, 28 insertions(+), 37 deletions(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 3bd68f1b79e6f2..cec54c6e1cdd6d 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -1942,23 +1942,18 @@ static void update_dev_time(const char *device_path) path_put(&path); } -static int btrfs_rm_dev_item(struct btrfs_device *device) +static int btrfs_rm_dev_item(struct btrfs_trans_handle *trans, + struct btrfs_device *device) { struct btrfs_root *root = device->fs_info->chunk_root; int ret; struct btrfs_path *path; struct btrfs_key key; - struct btrfs_trans_handle *trans; path = btrfs_alloc_path(); if (!path) return -ENOMEM; - trans = btrfs_start_transaction(root, 0); - if (IS_ERR(trans)) { - btrfs_free_path(path); - return PTR_ERR(trans); - } key.objectid = BTRFS_DEV_ITEMS_OBJECTID; key.type = BTRFS_DEV_ITEM_KEY; key.offset = device->devid; @@ -1969,21 +1964,12 @@ static int btrfs_rm_dev_item(struct btrfs_device *device) if (ret) { if (ret > 0) ret = -ENOENT; - btrfs_abort_transaction(trans, ret); - btrfs_end_transaction(trans); goto out; } ret = btrfs_del_item(trans, root, path); - if (ret) { - btrfs_abort_transaction(trans, ret); - btrfs_end_transaction(trans); - } - out: btrfs_free_path(path); - if (!ret) - ret = btrfs_commit_transaction(trans); return ret; } @@ -2124,6 +2110,7 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info, struct btrfs_dev_lookup_args *args, struct block_device **bdev, fmode_t *mode) { + struct btrfs_trans_handle *trans; struct btrfs_device *device; struct btrfs_fs_devices *cur_devices; struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; @@ -2139,7 +2126,7 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info, ret = btrfs_check_raid_min_devices(fs_info, num_devices - 1); if (ret) - goto out; + return ret; device = btrfs_find_device(fs_info->fs_devices, args); if (!device) { @@ -2147,27 +2134,22 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info, ret = BTRFS_ERROR_DEV_MISSING_NOT_FOUND; else ret = -ENOENT; - goto out; + return ret; } if (btrfs_pinned_by_swapfile(fs_info, device)) { btrfs_warn_in_rcu(fs_info, "cannot remove device %s (devid %llu) due to active swapfile", rcu_str_deref(device->name), device->devid); - ret = -ETXTBSY; - goto out; + return -ETXTBSY; } - if (test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state)) { - ret = BTRFS_ERROR_DEV_TGT_REPLACE; - goto out; - } + if (test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state)) + return BTRFS_ERROR_DEV_TGT_REPLACE; if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state) && - fs_info->fs_devices->rw_devices == 1) { - ret = BTRFS_ERROR_DEV_ONLY_WRITABLE; - goto out; - } + fs_info->fs_devices->rw_devices == 1) + return BTRFS_ERROR_DEV_ONLY_WRITABLE; if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) { mutex_lock(&fs_info->chunk_mutex); @@ -2182,14 +2164,22 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info, if (ret) goto error_undo; - /* - * TODO: the superblock still includes this device in its num_devices - * counter although write_all_supers() is not locked out. This - * could give a filesystem state which requires a degraded mount. - */ - ret = btrfs_rm_dev_item(device); - if (ret) + trans = btrfs_start_transaction(fs_info->chunk_root, 0); + if (IS_ERR(trans)) { + ret = PTR_ERR(trans); goto error_undo; + } + + ret = btrfs_rm_dev_item(trans, device); + if (ret) { + /* Any error in dev item removal is critical */ + btrfs_crit(fs_info, + "failed to remove device item for devid %llu: %d", + device->devid, ret); + btrfs_abort_transaction(trans, ret); + btrfs_end_transaction(trans); + return ret; + } clear_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state); btrfs_scrub_cancel_dev(device); @@ -2264,7 +2254,8 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info, free_fs_devices(cur_devices); } -out: + ret = btrfs_commit_transaction(trans); + return ret; error_undo: @@ -2276,7 +2267,7 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info, device->fs_devices->rw_devices++; mutex_unlock(&fs_info->chunk_mutex); } - goto out; + return ret; } void btrfs_rm_dev_replace_remove_srcdev(struct btrfs_device *srcdev) From dbbcf21ad6a877caae6506ca54aa74967e6f0dbd Mon Sep 17 00:00:00 2001 From: Luis Chamberlain Date: Mon, 27 Sep 2021 15:00:59 -0700 Subject: [PATCH 0354/1056] drbd: add error handling support for add_disk() [ Upstream commit e92ab4eda516a5bfd96c087282ebe9521deba4f4 ] We never checked for errors on add_disk() as this function returned void. Now that this is fixed, use the shiny new error handling. Signed-off-by: Luis Chamberlain Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- drivers/block/drbd/drbd_main.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 8ba2fe356f01a5..ae6a136d278e8c 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -2798,7 +2798,9 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig goto out_idr_remove_vol; } - add_disk(disk); + err = add_disk(disk); + if (err) + goto out_cleanup_disk; /* inherit the connection state */ device->state.conn = first_connection(resource)->cstate; @@ -2812,6 +2814,8 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig drbd_debugfs_device_add(device); return NO_ERROR; +out_cleanup_disk: + blk_cleanup_disk(disk); out_idr_remove_vol: idr_remove(&connection->peer_devices, vnr); out_idr_remove_from_resource: From 5bb1df0bfd4640b47dc78290e2c6415cb79fc89b Mon Sep 17 00:00:00 2001 From: Wu Bo Date: Thu, 4 Nov 2021 16:07:09 +0800 Subject: [PATCH 0355/1056] drbd: Fix double free problem in drbd_create_device [ Upstream commit 27548088ac628109f70eb0b1eb521d035844dba8 ] In drbd_create_device(), the 'out_no_io_page' lable has called blk_cleanup_disk() when return failed. So remove the 'out_cleanup_disk' lable to avoid double free the disk pointer. Fixes: e92ab4eda516 ("drbd: add error handling support for add_disk()") Signed-off-by: Wu Bo Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/1636013229-26309-1-git-send-email-wubo40@huawei.com Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- drivers/block/drbd/drbd_main.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index ae6a136d278e8c..b91d2a9dc23826 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -2800,7 +2800,7 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig err = add_disk(disk); if (err) - goto out_cleanup_disk; + goto out_idr_remove_vol; /* inherit the connection state */ device->state.conn = first_connection(resource)->cstate; @@ -2814,8 +2814,6 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig drbd_debugfs_device_add(device); return NO_ERROR; -out_cleanup_disk: - blk_cleanup_disk(disk); out_idr_remove_vol: idr_remove(&connection->peer_devices, vnr); out_idr_remove_from_resource: From f799df4569c139d320b18fc1e152c96ba4c89223 Mon Sep 17 00:00:00 2001 From: Xiaomeng Tong Date: Wed, 6 Apr 2022 21:04:44 +0200 Subject: [PATCH 0356/1056] drbd: fix an invalid memory access caused by incorrect use of list iterator MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit ae4d37b5df749926891583d42a6801b5da11e3c1 ] The bug is here: idr_remove(&connection->peer_devices, vnr); If the previous for_each_connection() don't exit early (no goto hit inside the loop), the iterator 'connection' after the loop will be a bogus pointer to an invalid structure object containing the HEAD (&resource->connections). As a result, the use of 'connection' above will lead to a invalid memory access (including a possible invalid free as idr_remove could call free_layer). The original intention should have been to remove all peer_devices, but the following lines have already done the work. So just remove this line and the unneeded label, to fix this bug. Cc: stable@vger.kernel.org Fixes: c06ece6ba6f1b ("drbd: Turn connection->volumes into connection->peer_devices") Signed-off-by: Xiaomeng Tong Reviewed-by: Christoph Böhmwalder Reviewed-by: Lars Ellenberg Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- drivers/block/drbd/drbd_main.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index b91d2a9dc23826..d59af26d770326 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -2795,12 +2795,12 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig if (init_submitter(device)) { err = ERR_NOMEM; - goto out_idr_remove_vol; + goto out_idr_remove_from_resource; } err = add_disk(disk); if (err) - goto out_idr_remove_vol; + goto out_idr_remove_from_resource; /* inherit the connection state */ device->state.conn = first_connection(resource)->cstate; @@ -2814,8 +2814,6 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig drbd_debugfs_device_add(device); return NO_ERROR; -out_idr_remove_vol: - idr_remove(&connection->peer_devices, vnr); out_idr_remove_from_resource: for_each_connection(connection, resource) { peer_device = idr_remove(&connection->peer_devices, vnr); From f276634b12fa8f63988be9cf5492c7d60d5ad7b1 Mon Sep 17 00:00:00 2001 From: Michael Strauss Date: Wed, 11 Aug 2021 11:38:44 -0400 Subject: [PATCH 0357/1056] drm/amd/display: Set min dcfclk if pipe count is 0 [ Upstream commit bc204778b4032b336cb3bde85bea852d79e7e389 ] [WHY] Clocks don't get recalculated in 0 stream/0 pipe configs, blocking S0i3 if dcfclk gets high enough [HOW] Create DCN31 copy of DCN30 bandwidth validation func which doesn't entirely skip validation in 0 pipe scenarios Override dcfclk to vlevel 0/min value during validation if pipe count is 0 Reviewed-by: Eric Yang Acked-by: Qingqing Zhuo Signed-off-by: Michael Strauss Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- .../drm/amd/display/dc/dcn30/dcn30_resource.c | 2 +- .../drm/amd/display/dc/dcn30/dcn30_resource.h | 7 +++ .../drm/amd/display/dc/dcn31/dcn31_resource.c | 63 ++++++++++++++++++- 3 files changed, 70 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c index 0294d0cc475952..735c92a5aa36a1 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c @@ -1856,7 +1856,7 @@ static struct pipe_ctx *dcn30_find_split_pipe( return pipe; } -static noinline bool dcn30_internal_validate_bw( +noinline bool dcn30_internal_validate_bw( struct dc *dc, struct dc_state *context, display_e2e_pipe_params_st *pipes, diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.h b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.h index b754b89beadfb9..b92e4cc0232f2d 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.h +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.h @@ -55,6 +55,13 @@ unsigned int dcn30_calc_max_scaled_time( bool dcn30_validate_bandwidth(struct dc *dc, struct dc_state *context, bool fast_validate); +bool dcn30_internal_validate_bw( + struct dc *dc, + struct dc_state *context, + display_e2e_pipe_params_st *pipes, + int *pipe_cnt_out, + int *vlevel_out, + bool fast_validate); void dcn30_calculate_wm_and_dlg( struct dc *dc, struct dc_state *context, display_e2e_pipe_params_st *pipes, diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c index b60ab3cc0f1193..7aadb35a30793a 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c @@ -1664,6 +1664,15 @@ static void dcn31_calculate_wm_and_dlg_fp( if (context->bw_ctx.dml.soc.min_dcfclk > dcfclk) dcfclk = context->bw_ctx.dml.soc.min_dcfclk; + /* We don't recalculate clocks for 0 pipe configs, which can block + * S0i3 as high clocks will block low power states + * Override any clocks that can block S0i3 to min here + */ + if (pipe_cnt == 0) { + context->bw_ctx.bw.dcn.clk.dcfclk_khz = dcfclk; // always should be vlevel 0 + return; + } + pipes[0].clks_cfg.voltage = vlevel; pipes[0].clks_cfg.dcfclk_mhz = dcfclk; pipes[0].clks_cfg.socclk_mhz = context->bw_ctx.dml.soc.clock_limits[vlevel].socclk_mhz; @@ -1789,6 +1798,58 @@ static void dcn31_calculate_wm_and_dlg( DC_FP_END(); } +bool dcn31_validate_bandwidth(struct dc *dc, + struct dc_state *context, + bool fast_validate) +{ + bool out = false; + + BW_VAL_TRACE_SETUP(); + + int vlevel = 0; + int pipe_cnt = 0; + display_e2e_pipe_params_st *pipes = kzalloc(dc->res_pool->pipe_count * sizeof(display_e2e_pipe_params_st), GFP_KERNEL); + DC_LOGGER_INIT(dc->ctx->logger); + + BW_VAL_TRACE_COUNT(); + + out = dcn30_internal_validate_bw(dc, context, pipes, &pipe_cnt, &vlevel, fast_validate); + + // Disable fast_validate to set min dcfclk in alculate_wm_and_dlg + if (pipe_cnt == 0) + fast_validate = false; + + if (!out) + goto validate_fail; + + BW_VAL_TRACE_END_VOLTAGE_LEVEL(); + + if (fast_validate) { + BW_VAL_TRACE_SKIP(fast); + goto validate_out; + } + + dc->res_pool->funcs->calculate_wm_and_dlg(dc, context, pipes, pipe_cnt, vlevel); + + BW_VAL_TRACE_END_WATERMARKS(); + + goto validate_out; + +validate_fail: + DC_LOG_WARNING("Mode Validation Warning: %s failed alidation.\n", + dml_get_status_message(context->bw_ctx.dml.vba.ValidationStatus[context->bw_ctx.dml.vba.soc.num_states])); + + BW_VAL_TRACE_SKIP(fail); + out = false; + +validate_out: + kfree(pipes); + + BW_VAL_TRACE_FINISH(); + + return out; +} + static struct dc_cap_funcs cap_funcs = { .get_dcc_compression_cap = dcn20_get_dcc_compression_cap }; @@ -1871,7 +1932,7 @@ static struct resource_funcs dcn31_res_pool_funcs = { .link_encs_assign = link_enc_cfg_link_encs_assign, .link_enc_unassign = link_enc_cfg_link_enc_unassign, .panel_cntl_create = dcn31_panel_cntl_create, - .validate_bandwidth = dcn30_validate_bandwidth, + .validate_bandwidth = dcn31_validate_bandwidth, .calculate_wm_and_dlg = dcn31_calculate_wm_and_dlg, .update_soc_for_wm_a = dcn31_update_soc_for_wm_a, .populate_dml_pipes = dcn31_populate_dml_pipes_from_context, From 59bf2aca4b1c3eca28b337b5e797bb9b43d44f3b Mon Sep 17 00:00:00 2001 From: CHANDAN VURDIGERE NATARAJ Date: Tue, 29 Mar 2022 13:10:31 +0530 Subject: [PATCH 0358/1056] drm/amd/display: Fix by adding FPU protection for dcn30_internal_validate_bw [ Upstream commit 50e6cb3fd2cde554db646282ea10df7236e6493c ] [Why] Below general protection fault observed when WebGL Aquarium is run for longer duration. If drm debug logs are enabled and set to 0x1f then the issue is observed within 10 minutes of run. [ 100.717056] general protection fault, probably for non-canonical address 0x2d33302d32323032: 0000 [#1] PREEMPT SMP NOPTI [ 100.727921] CPU: 3 PID: 1906 Comm: DrmThread Tainted: G W 5.15.30 #12 d726c6a2d6ebe5cf9223931cbca6892f916fe18b [ 100.754419] RIP: 0010:CalculateSwathWidth+0x1f7/0x44f [ 100.767109] Code: 00 00 00 f2 42 0f 11 04 f0 48 8b 85 88 00 00 00 f2 42 0f 10 04 f0 48 8b 85 98 00 00 00 f2 42 0f 11 04 f0 48 8b 45 10 0f 57 c0 42 0f 2a 04 b0 0f 57 c9 f3 43 0f 2a 0c b4 e8 8c e2 f3 ff 48 8b [ 100.781269] RSP: 0018:ffffa9230079eeb0 EFLAGS: 00010246 [ 100.812528] RAX: 2d33302d32323032 RBX: 0000000000000500 RCX: 0000000000000000 [ 100.819656] RDX: 0000000000000001 RSI: ffff99deb712c49c RDI: 0000000000000000 [ 100.826781] RBP: ffffa9230079ef50 R08: ffff99deb712460c R09: ffff99deb712462c [ 100.833907] R10: ffff99deb7124940 R11: ffff99deb7124d70 R12: ffff99deb712ae44 [ 100.841033] R13: 0000000000000001 R14: 0000000000000000 R15: ffffa9230079f0a0 [ 100.848159] FS: 00007af121212640(0000) GS:ffff99deba780000(0000) knlGS:0000000000000000 [ 100.856240] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 100.861980] CR2: 0000209000fe1000 CR3: 000000011b18c000 CR4: 0000000000350ee0 [ 100.869106] Call Trace: [ 100.871555] [ 100.873655] ? asm_sysvec_reschedule_ipi+0x12/0x20 [ 100.878449] CalculateSwathAndDETConfiguration+0x1a3/0x6dd [ 100.883937] dml31_ModeSupportAndSystemConfigurationFull+0x2ce4/0x76da [ 100.890467] ? kallsyms_lookup_buildid+0xc8/0x163 [ 100.895173] ? kallsyms_lookup_buildid+0xc8/0x163 [ 100.899874] ? __sprint_symbol+0x80/0x135 [ 100.903883] ? dm_update_plane_state+0x3f9/0x4d2 [ 100.908500] ? symbol_string+0xb7/0xde [ 100.912250] ? number+0x145/0x29b [ 100.915566] ? vsnprintf+0x341/0x5ff [ 100.919141] ? desc_read_finalized_seq+0x39/0x87 [ 100.923755] ? update_load_avg+0x1b9/0x607 [ 100.927849] ? compute_mst_dsc_configs_for_state+0x7d/0xd5b [ 100.933416] ? fetch_pipe_params+0xa4d/0xd0c [ 100.937686] ? dc_fpu_end+0x3d/0xa8 [ 100.941175] dml_get_voltage_level+0x16b/0x180 [ 100.945619] dcn30_internal_validate_bw+0x10e/0x89b [ 100.950495] ? dcn31_validate_bandwidth+0x68/0x1fc [ 100.955285] ? resource_build_scaling_params+0x98b/0xb8c [ 100.960595] ? dcn31_validate_bandwidth+0x68/0x1fc [ 100.965384] dcn31_validate_bandwidth+0x9a/0x1fc [ 100.970001] dc_validate_global_state+0x238/0x295 [ 100.974703] amdgpu_dm_atomic_check+0x9c1/0xbce [ 100.979235] ? _printk+0x59/0x73 [ 100.982467] drm_atomic_check_only+0x403/0x78b [ 100.986912] drm_mode_atomic_ioctl+0x49b/0x546 [ 100.991358] ? drm_ioctl+0x1c1/0x3b3 [ 100.994936] ? drm_atomic_set_property+0x92a/0x92a [ 100.999725] drm_ioctl_kernel+0xdc/0x149 [ 101.003648] drm_ioctl+0x27f/0x3b3 [ 101.007051] ? drm_atomic_set_property+0x92a/0x92a [ 101.011842] amdgpu_drm_ioctl+0x49/0x7d [ 101.015679] __se_sys_ioctl+0x7c/0xb8 [ 101.015685] do_syscall_64+0x5f/0xb8 [ 101.015690] ? __irq_exit_rcu+0x34/0x96 [How] It calles populate_dml_pipes which uses doubles to initialize. Adding FPU protection avoids context switch and probable loss of vba context as there is potential contention while drm debug logs are enabled. Signed-off-by: CHANDAN VURDIGERE NATARAJ Reviewed-by: Rodrigo Siqueira Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c index 7aadb35a30793a..e224c521325811 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c @@ -1813,7 +1813,9 @@ bool dcn31_validate_bandwidth(struct dc *dc, BW_VAL_TRACE_COUNT(); + DC_FP_START(); out = dcn30_internal_validate_bw(dc, context, pipes, &pipe_cnt, &vlevel, fast_validate); + DC_FP_END(); // Disable fast_validate to set min dcfclk in alculate_wm_and_dlg if (pipe_cnt == 0) From c2a9881bc2cac1a79393d44466e4c1d8ad478fc0 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 28 Dec 2021 14:26:03 -0500 Subject: [PATCH 0359/1056] NFSD: De-duplicate net_generic(nf->nf_net, nfsd_net_id) [ Upstream commit 2c445a0e72cb1fbfbdb7f9473c53556ee27c1d90 ] Since this pointer is used repeatedly, move it to a stack variable. Signed-off-by: Chuck Lever Signed-off-by: Sasha Levin --- fs/nfsd/vfs.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 5f62fa0963ced7..c8e3f81d110eaf 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -1121,6 +1121,7 @@ __be32 nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t offset, unsigned long count, __be32 *verf) { + struct nfsd_net *nn; struct nfsd_file *nf; loff_t end = LLONG_MAX; __be32 err = nfserr_inval; @@ -1137,6 +1138,7 @@ nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp, NFSD_MAY_WRITE|NFSD_MAY_NOT_BREAK_LEASE, &nf); if (err) goto out; + nn = net_generic(nf->nf_net, nfsd_net_id); if (EX_ISSYNC(fhp->fh_export)) { errseq_t since = READ_ONCE(nf->nf_file->f_wb_err); int err2; @@ -1144,8 +1146,7 @@ nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp, err2 = vfs_fsync_range(nf->nf_file, offset, end, 0); switch (err2) { case 0: - nfsd_copy_boot_verifier(verf, net_generic(nf->nf_net, - nfsd_net_id)); + nfsd_copy_boot_verifier(verf, nn); err2 = filemap_check_wb_err(nf->nf_file->f_mapping, since); err = nfserrno(err2); @@ -1154,13 +1155,11 @@ nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp, err = nfserr_notsupp; break; default: - nfsd_reset_boot_verifier(net_generic(nf->nf_net, - nfsd_net_id)); + nfsd_reset_boot_verifier(nn); err = nfserrno(err2); } } else - nfsd_copy_boot_verifier(verf, net_generic(nf->nf_net, - nfsd_net_id)); + nfsd_copy_boot_verifier(verf, nn); nfsd_file_put(nf); out: From d6f1651ddf9176b32335dc069c61e0dc0d600ce1 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 24 Jan 2022 15:50:31 -0500 Subject: [PATCH 0360/1056] NFSD: COMMIT operations must not return NFS?ERR_INVAL [ Upstream commit 3f965021c8bc38965ecb1924f570c4842b33d408 ] Since, well, forever, the Linux NFS server's nfsd_commit() function has returned nfserr_inval when the passed-in byte range arguments were non-sensical. However, according to RFC 1813 section 3.3.21, NFSv3 COMMIT requests are permitted to return only the following non-zero status codes: NFS3ERR_IO NFS3ERR_STALE NFS3ERR_BADHANDLE NFS3ERR_SERVERFAULT NFS3ERR_INVAL is not included in that list. Likewise, NFS4ERR_INVAL is not listed in the COMMIT row of Table 6 in RFC 8881. RFC 7530 does permit COMMIT to return NFS4ERR_INVAL, but does not specify when it can or should be used. Instead of dropping or failing a COMMIT request in a byte range that is not supported, turn it into a valid request by treating one or both arguments as zero. Offset zero means start-of-file, count zero means until-end-of-file, so we only ever extend the commit range. NFS servers are always allowed to commit more and sooner than requested. The range check is no longer bounded by NFS_OFFSET_MAX, but rather by the value that is returned in the maxfilesize field of the NFSv3 FSINFO procedure or the NFSv4 maxfilesize file attribute. Note that this change results in a new pynfs failure: CMT4 st_commit.testCommitOverflow : RUNNING CMT4 st_commit.testCommitOverflow : FAILURE COMMIT with offset + count overflow should return NFS4ERR_INVAL, instead got NFS4_OK IMO the test is not correct as written: RFC 8881 does not allow the COMMIT operation to return NFS4ERR_INVAL. Reported-by: Dan Aloni Cc: stable@vger.kernel.org Signed-off-by: Chuck Lever Reviewed-by: Bruce Fields Signed-off-by: Sasha Levin --- fs/nfsd/nfs3proc.c | 6 ------ fs/nfsd/vfs.c | 53 +++++++++++++++++++++++++++++++--------------- fs/nfsd/vfs.h | 4 ++-- 3 files changed, 38 insertions(+), 25 deletions(-) diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c index b540489ea240d7..936eebd4c56dca 100644 --- a/fs/nfsd/nfs3proc.c +++ b/fs/nfsd/nfs3proc.c @@ -660,15 +660,9 @@ nfsd3_proc_commit(struct svc_rqst *rqstp) argp->count, (unsigned long long) argp->offset); - if (argp->offset > NFS_OFFSET_MAX) { - resp->status = nfserr_inval; - goto out; - } - fh_copy(&resp->fh, &argp->fh); resp->status = nfsd_commit(rqstp, &resp->fh, argp->offset, argp->count, resp->verf); -out: return rpc_success; } diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index c8e3f81d110eaf..abfbb6953e89a0 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -1108,42 +1108,61 @@ nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t offset, } #ifdef CONFIG_NFSD_V3 -/* - * Commit all pending writes to stable storage. +/** + * nfsd_commit - Commit pending writes to stable storage + * @rqstp: RPC request being processed + * @fhp: NFS filehandle + * @offset: raw offset from beginning of file + * @count: raw count of bytes to sync + * @verf: filled in with the server's current write verifier * - * Note: we only guarantee that data that lies within the range specified - * by the 'offset' and 'count' parameters will be synced. + * Note: we guarantee that data that lies within the range specified + * by the 'offset' and 'count' parameters will be synced. The server + * is permitted to sync data that lies outside this range at the + * same time. * * Unfortunately we cannot lock the file to make sure we return full WCC * data to the client, as locking happens lower down in the filesystem. + * + * Return values: + * An nfsstat value in network byte order. */ __be32 -nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp, - loff_t offset, unsigned long count, __be32 *verf) +nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp, u64 offset, + u32 count, __be32 *verf) { + u64 maxbytes; + loff_t start, end; struct nfsd_net *nn; struct nfsd_file *nf; - loff_t end = LLONG_MAX; - __be32 err = nfserr_inval; - - if (offset < 0) - goto out; - if (count != 0) { - end = offset + (loff_t)count - 1; - if (end < offset) - goto out; - } + __be32 err; err = nfsd_file_acquire(rqstp, fhp, NFSD_MAY_WRITE|NFSD_MAY_NOT_BREAK_LEASE, &nf); if (err) goto out; + + /* + * Convert the client-provided (offset, count) range to a + * (start, end) range. If the client-provided range falls + * outside the maximum file size of the underlying FS, + * clamp the sync range appropriately. + */ + start = 0; + end = LLONG_MAX; + maxbytes = (u64)fhp->fh_dentry->d_sb->s_maxbytes; + if (offset < maxbytes) { + start = offset; + if (count && (offset + count - 1 < maxbytes)) + end = offset + count - 1; + } + nn = net_generic(nf->nf_net, nfsd_net_id); if (EX_ISSYNC(fhp->fh_export)) { errseq_t since = READ_ONCE(nf->nf_file->f_wb_err); int err2; - err2 = vfs_fsync_range(nf->nf_file, offset, end, 0); + err2 = vfs_fsync_range(nf->nf_file, start, end, 0); switch (err2) { case 0: nfsd_copy_boot_verifier(verf, nn); diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h index b21b76e6b9a876..3cf5a8a13da505 100644 --- a/fs/nfsd/vfs.h +++ b/fs/nfsd/vfs.h @@ -73,8 +73,8 @@ __be32 do_nfsd_create(struct svc_rqst *, struct svc_fh *, char *name, int len, struct iattr *attrs, struct svc_fh *res, int createmode, u32 *verifier, bool *truncp, bool *created); -__be32 nfsd_commit(struct svc_rqst *, struct svc_fh *, - loff_t, unsigned long, __be32 *verf); +__be32 nfsd_commit(struct svc_rqst *rqst, struct svc_fh *fhp, + u64 offset, u32 count, __be32 *verf); #endif /* CONFIG_NFSD_V3 */ #ifdef CONFIG_NFSD_V4 __be32 nfsd_getxattr(struct svc_rqst *rqstp, struct svc_fh *fhp, From ffd3e67f0dfbb9b6e4aebc2c6027005bad0466a9 Mon Sep 17 00:00:00 2001 From: Palmer Dabbelt Date: Fri, 4 Feb 2022 13:14:08 -0800 Subject: [PATCH 0361/1056] riscv/mm: Add XIP_FIXUP for riscv_pfn_base [ Upstream commit ca0cb9a60f6d86d4b2139c6f393a78f39edcd7cb ] This manifests as a crash early in boot on VexRiscv. Signed-off-by: Myrtle Shah [Palmer: split commit] Fixes: 44c922572952 ("RISC-V: enable XIP") Cc: stable@vger.kernel.org Signed-off-by: Palmer Dabbelt Signed-off-by: Sasha Levin --- arch/riscv/mm/init.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index 7f130ac3b9f9ad..c58a7c77989b0c 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -265,6 +265,7 @@ pgd_t early_pg_dir[PTRS_PER_PGD] __initdata __aligned(PAGE_SIZE); static pmd_t __maybe_unused early_dtb_pmd[PTRS_PER_PMD] __initdata __aligned(PAGE_SIZE); #ifdef CONFIG_XIP_KERNEL +#define riscv_pfn_base (*(unsigned long *)XIP_FIXUP(&riscv_pfn_base)) #define trampoline_pg_dir ((pgd_t *)XIP_FIXUP(trampoline_pg_dir)) #define fixmap_pte ((pte_t *)XIP_FIXUP(fixmap_pte)) #define early_pg_dir ((pgd_t *)XIP_FIXUP(early_pg_dir)) From 4c0bb583a4444cce224e8661090cbffc98e2fe07 Mon Sep 17 00:00:00 2001 From: Haibo Chen Date: Tue, 22 Feb 2022 10:42:21 +0800 Subject: [PATCH 0362/1056] iio: accel: mma8452: use the correct logic to get mma8452_data [ Upstream commit c87b7b12f48db86ac9909894f4dc0107d7df6375 ] The original logic to get mma8452_data is wrong, the *dev point to the device belong to iio_dev. we can't use this dev to find the correct i2c_client. The original logic happen to work because it finally use dev->driver_data to get iio_dev. Here use the API to_i2c_client() is wrong and make reader confuse. To correct the logic, it should be like this struct mma8452_data *data = iio_priv(dev_get_drvdata(dev)); But after commit 8b7651f25962 ("iio: iio_device_alloc(): Remove unnecessary self drvdata"), the upper logic also can't work. When try to show the avialable scale in userspace, will meet kernel dump, kernel handle NULL pointer dereference. So use dev_to_iio_dev() to correct the logic. Dual fixes tags as the second reflects when the bug was exposed, whilst the first reflects when the original bug was introduced. Fixes: c3cdd6e48e35 ("iio: mma8452: refactor for seperating chip specific data") Fixes: 8b7651f25962 ("iio: iio_device_alloc(): Remove unnecessary self drvdata") Signed-off-by: Haibo Chen Reviewed-by: Martin Kepplinger Cc: Link: https://lore.kernel.org/r/1645497741-5402-1-git-send-email-haibo.chen@nxp.com Signed-off-by: Jonathan Cameron Signed-off-by: Sasha Levin --- drivers/iio/accel/mma8452.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iio/accel/mma8452.c b/drivers/iio/accel/mma8452.c index 373b59557afe96..1f46a73aafeacd 100644 --- a/drivers/iio/accel/mma8452.c +++ b/drivers/iio/accel/mma8452.c @@ -380,8 +380,8 @@ static ssize_t mma8452_show_scale_avail(struct device *dev, struct device_attribute *attr, char *buf) { - struct mma8452_data *data = iio_priv(i2c_get_clientdata( - to_i2c_client(dev))); + struct iio_dev *indio_dev = dev_to_iio_dev(dev); + struct mma8452_data *data = iio_priv(indio_dev); return mma8452_show_int_plus_micros(buf, data->chip_info->mma_scales, ARRAY_SIZE(data->chip_info->mma_scales)); From e65d78b12fbc0f4392f9d97dda11e2157a36de42 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Sun, 6 Mar 2022 22:57:48 +0100 Subject: [PATCH 0363/1056] batman-adv: Use netif_rx(). [ Upstream commit 94da81e2fc4285db373fe9a1eb012c2ee205b110 ] Since commit baebdf48c3600 ("net: dev: Makes sure netif_rx() can be invoked in any context.") the function netif_rx() can be used in preemptible/thread context as well as in interrupt context. Use netif_rx(). Cc: Antonio Quartulli Cc: Marek Lindner Cc: Simon Wunderlich Cc: Sven Eckelmann Cc: b.a.t.m.a.n@lists.open-mesh.org Signed-off-by: Sebastian Andrzej Siewior Acked-by: Sven Eckelmann Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/batman-adv/bridge_loop_avoidance.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c index 17687848daec56..11f6ef657d822d 100644 --- a/net/batman-adv/bridge_loop_avoidance.c +++ b/net/batman-adv/bridge_loop_avoidance.c @@ -443,7 +443,7 @@ static void batadv_bla_send_claim(struct batadv_priv *bat_priv, u8 *mac, batadv_add_counter(bat_priv, BATADV_CNT_RX_BYTES, skb->len + ETH_HLEN); - netif_rx_any_context(skb); + netif_rx(skb); out: batadv_hardif_put(primary_if); } From a1e69c36de1777cbe2f99cbf7a72070f2c14c6d4 Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Mon, 28 Feb 2022 18:33:34 +0200 Subject: [PATCH 0364/1056] mtd: spi-nor: Skip erase logic when SPI_NOR_NO_ERASE is set [ Upstream commit 151c6b49d679872d6fc0b50e0ad96303091694a2 ] Even if SPI_NOR_NO_ERASE was set, one could still send erase opcodes to the flash. It is not recommended to send unsupported opcodes to flashes. Fix the logic and do not set mtd->_erase when SPI_NOR_NO_ERASE is specified. With this users will not be able to issue erase opcodes to flashes and instead they will recive an -ENOTSUPP error. Fixes: b199489d37b2 ("mtd: spi-nor: add the framework for SPI NOR") Signed-off-by: Tudor Ambarus Reviewed-by: Michael Walle Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20220228163334.277730-1-tudor.ambarus@microchip.com Signed-off-by: Sasha Levin --- drivers/mtd/spi-nor/core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/mtd/spi-nor/core.c b/drivers/mtd/spi-nor/core.c index 90f39aabc1ff8f..d97cdbc2b9de35 100644 --- a/drivers/mtd/spi-nor/core.c +++ b/drivers/mtd/spi-nor/core.c @@ -3148,7 +3148,6 @@ int spi_nor_scan(struct spi_nor *nor, const char *name, mtd->writesize = nor->params->writesize; mtd->flags = MTD_CAP_NORFLASH; mtd->size = nor->params->size; - mtd->_erase = spi_nor_erase; mtd->_read = spi_nor_read; mtd->_suspend = spi_nor_suspend; mtd->_resume = spi_nor_resume; @@ -3178,6 +3177,8 @@ int spi_nor_scan(struct spi_nor *nor, const char *name, if (info->flags & SPI_NOR_NO_ERASE) mtd->flags |= MTD_NO_ERASE; + else + mtd->_erase = spi_nor_erase; mtd->dev.parent = dev; nor->page_size = nor->params->page_size; From ff41804632e530e8f8971f45b23ca29d63edf243 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 5 Nov 2021 13:36:19 -0700 Subject: [PATCH 0365/1056] Compiler Attributes: add __alloc_size() for better bounds checking [ Upstream commit 86cffecdeaa278444870c8745ab166a65865dbf0 ] GCC and Clang can use the "alloc_size" attribute to better inform the results of __builtin_object_size() (for compile-time constant values). Clang can additionally use alloc_size to inform the results of __builtin_dynamic_object_size() (for run-time values). Because GCC sees the frequent use of struct_size() as an allocator size argument, and notices it can return SIZE_MAX (the overflow indication), it complains about these call sites overflowing (since SIZE_MAX is greater than the default -Walloc-size-larger-than=PTRDIFF_MAX). This isn't helpful since we already know a SIZE_MAX will be caught at run-time (this was an intentional design). To deal with this, we must disable this check as it is both a false positive and redundant. (Clang does not have this warning option.) Unfortunately, just checking the -Wno-alloc-size-larger-than is not sufficient to make the __alloc_size attribute behave correctly under older GCC versions. The attribute itself must be disabled in those situations too, as there appears to be no way to reliably silence the SIZE_MAX constant expression cases for GCC versions less than 9.1: In file included from ./include/linux/resource_ext.h:11, from ./include/linux/pci.h:40, from drivers/net/ethernet/intel/ixgbe/ixgbe.h:9, from drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c:4: In function 'kmalloc_node', inlined from 'ixgbe_alloc_q_vector' at ./include/linux/slab.h:743:9: ./include/linux/slab.h:618:9: error: argument 1 value '18446744073709551615' exceeds maximum object size 9223372036854775807 [-Werror=alloc-size-larger-than=] return __kmalloc_node(size, flags, node); ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ./include/linux/slab.h: In function 'ixgbe_alloc_q_vector': ./include/linux/slab.h:455:7: note: in a call to allocation function '__kmalloc_node' declared here void *__kmalloc_node(size_t size, gfp_t flags, int node) __assume_slab_alignment __malloc; ^~~~~~~~~~~~~~ Specifically: '-Wno-alloc-size-larger-than' is not correctly handled by GCC < 9.1 https://godbolt.org/z/hqsfG7q84 (doesn't disable) https://godbolt.org/z/P9jdrPTYh (doesn't admit to not knowing about option) https://godbolt.org/z/465TPMWKb (only warns when other warnings appear) '-Walloc-size-larger-than=18446744073709551615' is not handled by GCC < 8.2 https://godbolt.org/z/73hh1EPxz (ignores numeric value) Since anything marked with __alloc_size would also qualify for marking with __malloc, just include __malloc along with it to avoid redundant markings. (Suggested by Linus Torvalds.) Finally, make sure checkpatch.pl doesn't get confused about finding the __alloc_size attribute on functions. (Thanks to Joe Perches.) Link: https://lkml.kernel.org/r/20210930222704.2631604-3-keescook@chromium.org Signed-off-by: Kees Cook Tested-by: Randy Dunlap Cc: Andy Whitcroft Cc: Christoph Lameter Cc: Daniel Micay Cc: David Rientjes Cc: Dennis Zhou Cc: Dwaipayan Ray Cc: Joe Perches Cc: Joonsoo Kim Cc: Lukas Bulwahn Cc: Pekka Enberg Cc: Tejun Heo Cc: Vlastimil Babka Cc: Alexandre Bounine Cc: Gustavo A. R. Silva Cc: Ira Weiny Cc: Jing Xiangfeng Cc: John Hubbard Cc: kernel test robot Cc: Matt Porter Cc: Miguel Ojeda Cc: Nathan Chancellor Cc: Nick Desaulniers Cc: Souptick Joarder Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- Makefile | 15 +++++++++++++++ include/linux/compiler-gcc.h | 8 ++++++++ include/linux/compiler_attributes.h | 10 ++++++++++ include/linux/compiler_types.h | 12 ++++++++++++ scripts/checkpatch.pl | 3 ++- 5 files changed, 47 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index c7750d260a551a..397fb08d17f2b7 100644 --- a/Makefile +++ b/Makefile @@ -1011,6 +1011,21 @@ ifdef CONFIG_CC_IS_GCC KBUILD_CFLAGS += -Wno-maybe-uninitialized endif +ifdef CONFIG_CC_IS_GCC +# The allocators already balk at large sizes, so silence the compiler +# warnings for bounds checks involving those possible values. While +# -Wno-alloc-size-larger-than would normally be used here, earlier versions +# of gcc (<9.1) weirdly don't handle the option correctly when _other_ +# warnings are produced (?!). Using -Walloc-size-larger-than=SIZE_MAX +# doesn't work (as it is documented to), silently resolving to "0" prior to +# version 9.1 (and producing an error more recently). Numeric values larger +# than PTRDIFF_MAX also don't work prior to version 9.1, which are silently +# ignored, continuing to default to PTRDIFF_MAX. So, left with no other +# choice, we must perform a versioned check to disable this warning. +# https://lore.kernel.org/lkml/20210824115859.187f272f@canb.auug.org.au +KBUILD_CFLAGS += $(call cc-ifversion, -ge, 0901, -Wno-alloc-size-larger-than) +endif + # disable invalid "can't wrap" optimizations for signed / pointers KBUILD_CFLAGS += -fno-strict-overflow diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index bd2b881c6b63a8..b9d5f9c373a09e 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -144,3 +144,11 @@ #else #define __diag_GCC_8(s) #endif + +/* + * Prior to 9.1, -Wno-alloc-size-larger-than (and therefore the "alloc_size" + * attribute) do not work, and must be disabled. + */ +#if GCC_VERSION < 90100 +#undef __alloc_size__ +#endif diff --git a/include/linux/compiler_attributes.h b/include/linux/compiler_attributes.h index e6ec634039658e..3de06a8fae73b9 100644 --- a/include/linux/compiler_attributes.h +++ b/include/linux/compiler_attributes.h @@ -33,6 +33,15 @@ #define __aligned(x) __attribute__((__aligned__(x))) #define __aligned_largest __attribute__((__aligned__)) +/* + * Note: do not use this directly. Instead, use __alloc_size() since it is conditionally + * available and includes other attributes. + * + * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-alloc_005fsize-function-attribute + * clang: https://clang.llvm.org/docs/AttributeReference.html#alloc-size + */ +#define __alloc_size__(x, ...) __attribute__((__alloc_size__(x, ## __VA_ARGS__))) + /* * Note: users of __always_inline currently do not write "inline" themselves, * which seems to be required by gcc to apply the attribute according @@ -153,6 +162,7 @@ /* * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-malloc-function-attribute + * clang: https://clang.llvm.org/docs/AttributeReference.html#malloc */ #define __malloc __attribute__((__malloc__)) diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index b6ff83a714ca98..4f2203c4a2574d 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -250,6 +250,18 @@ struct ftrace_likely_data { # define __cficanonical #endif +/* + * Any place that could be marked with the "alloc_size" attribute is also + * a place to be marked with the "malloc" attribute. Do this as part of the + * __alloc_size macro to avoid redundant attributes and to avoid missing a + * __malloc marking. + */ +#ifdef __alloc_size__ +# define __alloc_size(x, ...) __alloc_size__(x, ## __VA_ARGS__) __malloc +#else +# define __alloc_size(x, ...) __malloc +#endif + #ifndef asm_volatile_goto #define asm_volatile_goto(x...) asm goto(x) #endif diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index c27d2312cfc307..88cb294dc4472e 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -489,7 +489,8 @@ sub hash_show_words { ____cacheline_aligned| ____cacheline_aligned_in_smp| ____cacheline_internodealigned_in_smp| - __weak + __weak| + __alloc_size\s*\(\s*\d+\s*(?:,\s*\d+\s*)?\) }x; our $Modifier; our $Inline = qr{inline|__always_inline|noinline|__inline|__inline__}; From c33904fd1ef49095488060f8e3ac807c6d70ca04 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 8 Mar 2022 04:47:22 -0500 Subject: [PATCH 0366/1056] mm: vmalloc: introduce array allocation functions [ Upstream commit a8749a35c39903120ec421ef2525acc8e0daa55c ] Linux has dozens of occurrences of vmalloc(array_size()) and vzalloc(array_size()). Allow to simplify the code by providing vmalloc_array and vcalloc, as well as the underscored variants that let the caller specify the GFP flags. Acked-by: Michal Hocko Signed-off-by: Paolo Bonzini Signed-off-by: Sasha Levin --- include/linux/vmalloc.h | 5 +++++ mm/util.c | 50 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 55 insertions(+) diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 4fe9e885bbfac2..5535be1012a284 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -159,6 +159,11 @@ void *__vmalloc_node(unsigned long size, unsigned long align, gfp_t gfp_mask, int node, const void *caller); void *vmalloc_no_huge(unsigned long size); +extern void *__vmalloc_array(size_t n, size_t size, gfp_t flags) __alloc_size(1, 2); +extern void *vmalloc_array(size_t n, size_t size) __alloc_size(1, 2); +extern void *__vcalloc(size_t n, size_t size, gfp_t flags) __alloc_size(1, 2); +extern void *vcalloc(size_t n, size_t size) __alloc_size(1, 2); + extern void vfree(const void *addr); extern void vfree_atomic(const void *addr); diff --git a/mm/util.c b/mm/util.c index 3073de05c2bd42..ea04979f131e23 100644 --- a/mm/util.c +++ b/mm/util.c @@ -698,6 +698,56 @@ static inline void *__page_rmapping(struct page *page) return (void *)mapping; } +/** + * __vmalloc_array - allocate memory for a virtually contiguous array. + * @n: number of elements. + * @size: element size. + * @flags: the type of memory to allocate (see kmalloc). + */ +void *__vmalloc_array(size_t n, size_t size, gfp_t flags) +{ + size_t bytes; + + if (unlikely(check_mul_overflow(n, size, &bytes))) + return NULL; + return __vmalloc(bytes, flags); +} +EXPORT_SYMBOL(__vmalloc_array); + +/** + * vmalloc_array - allocate memory for a virtually contiguous array. + * @n: number of elements. + * @size: element size. + */ +void *vmalloc_array(size_t n, size_t size) +{ + return __vmalloc_array(n, size, GFP_KERNEL); +} +EXPORT_SYMBOL(vmalloc_array); + +/** + * __vcalloc - allocate and zero memory for a virtually contiguous array. + * @n: number of elements. + * @size: element size. + * @flags: the type of memory to allocate (see kmalloc). + */ +void *__vcalloc(size_t n, size_t size, gfp_t flags) +{ + return __vmalloc_array(n, size, flags | __GFP_ZERO); +} +EXPORT_SYMBOL(__vcalloc); + +/** + * vcalloc - allocate and zero memory for a virtually contiguous array. + * @n: number of elements. + * @size: element size. + */ +void *vcalloc(size_t n, size_t size) +{ + return __vmalloc_array(n, size, GFP_KERNEL | __GFP_ZERO); +} +EXPORT_SYMBOL(vcalloc); + /* Neutral page->mapping pointer to address_space or anon_vma or other */ void *page_rmapping(struct page *page) { From 6784b694ecd82c1c396f82e012ef892eb409a8d7 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 8 Mar 2022 04:49:37 -0500 Subject: [PATCH 0367/1056] KVM: use __vcalloc for very large allocations [ Upstream commit 37b2a6510a48ca361ced679f92682b7b7d7d0330 ] Allocations whose size is related to the memslot size can be arbitrarily large. Do not use kvzalloc/kvcalloc, as those are limited to "not crazy" sizes that fit in 32 bits. Cc: stable@vger.kernel.org Fixes: 7661809d493b ("mm: don't allow oversized kvmalloc() calls") Reviewed-by: David Hildenbrand Signed-off-by: Paolo Bonzini Signed-off-by: Sasha Levin --- arch/powerpc/kvm/book3s_hv_uvmem.c | 2 +- arch/x86/kvm/mmu/page_track.c | 4 ++-- arch/x86/kvm/x86.c | 4 ++-- virt/kvm/kvm_main.c | 4 ++-- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/arch/powerpc/kvm/book3s_hv_uvmem.c b/arch/powerpc/kvm/book3s_hv_uvmem.c index 3fbe710ff83903..3d4ee75b0fb762 100644 --- a/arch/powerpc/kvm/book3s_hv_uvmem.c +++ b/arch/powerpc/kvm/book3s_hv_uvmem.c @@ -251,7 +251,7 @@ int kvmppc_uvmem_slot_init(struct kvm *kvm, const struct kvm_memory_slot *slot) p = kzalloc(sizeof(*p), GFP_KERNEL); if (!p) return -ENOMEM; - p->pfns = vzalloc(array_size(slot->npages, sizeof(*p->pfns))); + p->pfns = vcalloc(slot->npages, sizeof(*p->pfns)); if (!p->pfns) { kfree(p); return -ENOMEM; diff --git a/arch/x86/kvm/mmu/page_track.c b/arch/x86/kvm/mmu/page_track.c index 21427e84a82ef6..630ae70bb6bd3f 100644 --- a/arch/x86/kvm/mmu/page_track.c +++ b/arch/x86/kvm/mmu/page_track.c @@ -36,8 +36,8 @@ int kvm_page_track_create_memslot(struct kvm_memory_slot *slot, for (i = 0; i < KVM_PAGE_TRACK_MAX; i++) { slot->arch.gfn_track[i] = - kvcalloc(npages, sizeof(*slot->arch.gfn_track[i]), - GFP_KERNEL_ACCOUNT); + __vcalloc(npages, sizeof(*slot->arch.gfn_track[i]), + GFP_KERNEL_ACCOUNT); if (!slot->arch.gfn_track[i]) goto track_free; } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 39aaa21e28f785..8974884ef2ad52 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -11552,7 +11552,7 @@ static int memslot_rmap_alloc(struct kvm_memory_slot *slot, if (slot->arch.rmap[i]) continue; - slot->arch.rmap[i] = kvcalloc(lpages, sz, GFP_KERNEL_ACCOUNT); + slot->arch.rmap[i] = __vcalloc(lpages, sz, GFP_KERNEL_ACCOUNT); if (!slot->arch.rmap[i]) { memslot_rmap_free(slot); return -ENOMEM; @@ -11633,7 +11633,7 @@ static int kvm_alloc_memslot_metadata(struct kvm *kvm, lpages = __kvm_mmu_slot_lpages(slot, npages, level); - linfo = kvcalloc(lpages, sizeof(*linfo), GFP_KERNEL_ACCOUNT); + linfo = __vcalloc(lpages, sizeof(*linfo), GFP_KERNEL_ACCOUNT); if (!linfo) goto out_free; diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index fefdf3a6dae353..99c5915698156a 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1255,9 +1255,9 @@ static int kvm_vm_release(struct inode *inode, struct file *filp) */ static int kvm_alloc_dirty_bitmap(struct kvm_memory_slot *memslot) { - unsigned long dirty_bytes = 2 * kvm_dirty_bitmap_bytes(memslot); + unsigned long dirty_bytes = kvm_dirty_bitmap_bytes(memslot); - memslot->dirty_bitmap = kvzalloc(dirty_bytes, GFP_KERNEL_ACCOUNT); + memslot->dirty_bitmap = __vcalloc(2, dirty_bytes, GFP_KERNEL_ACCOUNT); if (!memslot->dirty_bitmap) return -ENOMEM; From 83772314e1e0a5232823651e44a77a33857eafba Mon Sep 17 00:00:00 2001 From: Dongliang Mu Date: Thu, 3 Mar 2022 22:40:27 +0800 Subject: [PATCH 0368/1056] btrfs: don't access possibly stale fs_info data in device_list_add [ Upstream commit 79c9234ba596e903907de20573fd4bcc85315b06 ] Syzbot reported a possible use-after-free in printing information in device_list_add. Very similar with the bug fixed by commit 0697d9a61099 ("btrfs: don't access possibly stale fs_info data for printing duplicate device"), but this time the use occurs in btrfs_info_in_rcu. Call Trace: kasan_report.cold+0x83/0xdf mm/kasan/report.c:459 btrfs_printk+0x395/0x425 fs/btrfs/super.c:244 device_list_add.cold+0xd7/0x2ed fs/btrfs/volumes.c:957 btrfs_scan_one_device+0x4c7/0x5c0 fs/btrfs/volumes.c:1387 btrfs_control_ioctl+0x12a/0x2d0 fs/btrfs/super.c:2409 vfs_ioctl fs/ioctl.c:51 [inline] __do_sys_ioctl fs/ioctl.c:874 [inline] __se_sys_ioctl fs/ioctl.c:860 [inline] __x64_sys_ioctl+0x193/0x200 fs/ioctl.c:860 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x44/0xae Fix this by modifying device->fs_info to NULL too. Reported-and-tested-by: syzbot+82650a4e0ed38f218363@syzkaller.appspotmail.com CC: stable@vger.kernel.org # 4.19+ Signed-off-by: Dongliang Mu Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Sasha Levin --- fs/btrfs/volumes.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index cec54c6e1cdd6d..89ce0b449c22a1 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -955,6 +955,11 @@ static noinline struct btrfs_device *device_list_add(const char *path, /* * We are going to replace the device path for a given devid, * make sure it's the same device if the device is mounted + * + * NOTE: the device->fs_info may not be reliable here so pass + * in a NULL to message helpers instead. This avoids a possible + * use-after-free when the fs_info and fs_info->sb are already + * torn down. */ if (device->bdev) { int error; @@ -968,12 +973,6 @@ static noinline struct btrfs_device *device_list_add(const char *path, if (device->bdev->bd_dev != path_dev) { mutex_unlock(&fs_devices->device_list_mutex); - /* - * device->fs_info may not be reliable here, so - * pass in a NULL instead. This avoids a - * possible use-after-free when the fs_info and - * fs_info->sb are already torn down. - */ btrfs_warn_in_rcu(NULL, "duplicate device %s devid %llu generation %llu scanned by %s (%d)", path, devid, found_transid, @@ -981,7 +980,7 @@ static noinline struct btrfs_device *device_list_add(const char *path, task_pid_nr(current)); return ERR_PTR(-EEXIST); } - btrfs_info_in_rcu(device->fs_info, + btrfs_info_in_rcu(NULL, "devid %llu device path %s changed to %s scanned by %s (%d)", devid, rcu_str_deref(device->name), path, current->comm, From 31c60d15ccd1b810d03c05e81d3c21bf242e319e Mon Sep 17 00:00:00 2001 From: Claudio Imbrenda Date: Tue, 1 Mar 2022 15:33:40 +0100 Subject: [PATCH 0369/1056] KVM: s390x: fix SCK locking [ Upstream commit c0573ba5c5a2244dc02060b1f374d4593c1d20b7 ] When handling the SCK instruction, the kvm lock is taken, even though the vcpu lock is already being held. The normal locking order is kvm lock first and then vcpu lock. This is can (and in some circumstances does) lead to deadlocks. The function kvm_s390_set_tod_clock is called both by the SCK handler and by some IOCTLs to set the clock. The IOCTLs will not hold the vcpu lock, so they can safely take the kvm lock. The SCK handler holds the vcpu lock, but will also somehow need to acquire the kvm lock without relinquishing the vcpu lock. The solution is to factor out the code to set the clock, and provide two wrappers. One is called like the original function and does the locking, the other is called kvm_s390_try_set_tod_clock and uses trylock to try to acquire the kvm lock. This new wrapper is then used in the SCK handler. If locking fails, -EAGAIN is returned, which is eventually propagated to userspace, thus also freeing the vcpu lock and allowing for forward progress. This is not the most efficient or elegant way to solve this issue, but the SCK instruction is deprecated and its performance is not critical. The goal of this patch is just to provide a simple but correct way to fix the bug. Fixes: 6a3f95a6b04c ("KVM: s390: Intercept SCK instruction") Signed-off-by: Claudio Imbrenda Reviewed-by: Christian Borntraeger Reviewed-by: Janis Schoetterl-Glausch Link: https://lore.kernel.org/r/20220301143340.111129-1-imbrenda@linux.ibm.com Cc: stable@vger.kernel.org Signed-off-by: Christian Borntraeger Signed-off-by: Sasha Levin --- arch/s390/kvm/kvm-s390.c | 19 ++++++++++++++++--- arch/s390/kvm/kvm-s390.h | 4 ++-- arch/s390/kvm/priv.c | 15 ++++++++++++++- 3 files changed, 32 insertions(+), 6 deletions(-) diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 402597f9d05052..b456aa196c04fa 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -3913,14 +3913,12 @@ static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu) return 0; } -void kvm_s390_set_tod_clock(struct kvm *kvm, - const struct kvm_s390_vm_tod_clock *gtod) +static void __kvm_s390_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod) { struct kvm_vcpu *vcpu; union tod_clock clk; int i; - mutex_lock(&kvm->lock); preempt_disable(); store_tod_clock_ext(&clk); @@ -3941,7 +3939,22 @@ void kvm_s390_set_tod_clock(struct kvm *kvm, kvm_s390_vcpu_unblock_all(kvm); preempt_enable(); +} + +void kvm_s390_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod) +{ + mutex_lock(&kvm->lock); + __kvm_s390_set_tod_clock(kvm, gtod); + mutex_unlock(&kvm->lock); +} + +int kvm_s390_try_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod) +{ + if (!mutex_trylock(&kvm->lock)) + return 0; + __kvm_s390_set_tod_clock(kvm, gtod); mutex_unlock(&kvm->lock); + return 1; } /** diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index 1539dd981104fe..f8803bf0ff1700 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -326,8 +326,8 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu); int kvm_s390_handle_sigp_pei(struct kvm_vcpu *vcpu); /* implemented in kvm-s390.c */ -void kvm_s390_set_tod_clock(struct kvm *kvm, - const struct kvm_s390_vm_tod_clock *gtod); +void kvm_s390_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod); +int kvm_s390_try_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod); long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable); int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long addr); int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr); diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index 417154b314a647..6a765fe22eafc2 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -102,7 +102,20 @@ static int handle_set_clock(struct kvm_vcpu *vcpu) return kvm_s390_inject_prog_cond(vcpu, rc); VCPU_EVENT(vcpu, 3, "SCK: setting guest TOD to 0x%llx", gtod.tod); - kvm_s390_set_tod_clock(vcpu->kvm, >od); + /* + * To set the TOD clock the kvm lock must be taken, but the vcpu lock + * is already held in handle_set_clock. The usual lock order is the + * opposite. As SCK is deprecated and should not be used in several + * cases, for example when the multiple epoch facility or TOD clock + * steering facility is installed (see Principles of Operation), a + * slow path can be used. If the lock can not be taken via try_lock, + * the instruction will be retried via -EAGAIN at a later point in + * time. + */ + if (!kvm_s390_try_set_tod_clock(vcpu->kvm, >od)) { + kvm_s390_retry_instr(vcpu); + return -EAGAIN; + } kvm_s390_set_psw_cc(vcpu, 0); return 0; From b342feb491415ee5a35d7edcd759e9d021acc544 Mon Sep 17 00:00:00 2001 From: Arun Easi Date: Thu, 10 Mar 2022 01:25:54 -0800 Subject: [PATCH 0370/1056] scsi: qla2xxx: Fix loss of NVMe namespaces after driver reload test [ Upstream commit db212f2eb3fb7f546366777e93c8f54614d39269 ] Driver registration of localport can race when it happens at the remote port discovery time. Fix this by calling the registration under a mutex. Link: https://lore.kernel.org/r/20220310092604.22950-4-njavali@marvell.com Fixes: e84067d74301 ("scsi: qla2xxx: Add FC-NVMe F/W initialization and transport registration") Cc: stable@vger.kernel.org Reported-by: Marco Patalano Tested-by: Marco Patalano Reviewed-by: Himanshu Madhani Signed-off-by: Arun Easi Signed-off-by: Nilesh Javali Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/qla2xxx/qla_nvme.c | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_nvme.c b/drivers/scsi/qla2xxx/qla_nvme.c index 42b29f4fd93717..1bf3ab10846aa6 100644 --- a/drivers/scsi/qla2xxx/qla_nvme.c +++ b/drivers/scsi/qla2xxx/qla_nvme.c @@ -775,7 +775,6 @@ int qla_nvme_register_hba(struct scsi_qla_host *vha) ha = vha->hw; tmpl = &qla_nvme_fc_transport; - WARN_ON(vha->nvme_local_port); qla_nvme_fc_transport.max_hw_queues = min((uint8_t)(qla_nvme_fc_transport.max_hw_queues), @@ -786,13 +785,25 @@ int qla_nvme_register_hba(struct scsi_qla_host *vha) pinfo.port_role = FC_PORT_ROLE_NVME_INITIATOR; pinfo.port_id = vha->d_id.b24; - ql_log(ql_log_info, vha, 0xffff, - "register_localport: host-traddr=nn-0x%llx:pn-0x%llx on portID:%x\n", - pinfo.node_name, pinfo.port_name, pinfo.port_id); - qla_nvme_fc_transport.dma_boundary = vha->host->dma_boundary; - - ret = nvme_fc_register_localport(&pinfo, tmpl, - get_device(&ha->pdev->dev), &vha->nvme_local_port); + mutex_lock(&ha->vport_lock); + /* + * Check again for nvme_local_port to see if any other thread raced + * with this one and finished registration. + */ + if (!vha->nvme_local_port) { + ql_log(ql_log_info, vha, 0xffff, + "register_localport: host-traddr=nn-0x%llx:pn-0x%llx on portID:%x\n", + pinfo.node_name, pinfo.port_name, pinfo.port_id); + qla_nvme_fc_transport.dma_boundary = vha->host->dma_boundary; + + ret = nvme_fc_register_localport(&pinfo, tmpl, + get_device(&ha->pdev->dev), + &vha->nvme_local_port); + mutex_unlock(&ha->vport_lock); + } else { + mutex_unlock(&ha->vport_lock); + return 0; + } if (ret) { ql_log(ql_log_warn, vha, 0xffff, "register_localport failed: ret=%x\n", ret); From 0a80e66a10af2b74c68739927201f4d5aebc467f Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 23 Aug 2021 15:29:12 +0000 Subject: [PATCH 0371/1056] powerpc/32: Don't use lmw/stmw for saving/restoring non volatile regs [ Upstream commit a85c728cb5e12216c19ae5878980c2cbbbf8616d ] Instructions lmw/stmw are interesting for functions that are rarely used and not in the cache, because only one instruction is to be copied into the instruction cache instead of 19. However those instruction are less performant than 19x raw lwz/stw as they require synchronisation plus one additional cycle. SAVE_NVGPRS / REST_NVGPRS are used in only a few places which are mostly in interrupts entries/exits and in task switch so they are likely already in the cache. Using standard lwz improves null_syscall selftest by: - 10 cycles on mpc832x. - 2 cycles on mpc8xx. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/316c543b8906712c108985c8463eec09c8db577b.1629732542.git.christophe.leroy@csgroup.eu Signed-off-by: Sasha Levin --- arch/powerpc/include/asm/ppc_asm.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h index 1c538a9a11e093..7be24048b8d1a4 100644 --- a/arch/powerpc/include/asm/ppc_asm.h +++ b/arch/powerpc/include/asm/ppc_asm.h @@ -28,8 +28,8 @@ #else #define SAVE_GPR(n, base) stw n,GPR0+4*(n)(base) #define REST_GPR(n, base) lwz n,GPR0+4*(n)(base) -#define SAVE_NVGPRS(base) stmw 13, GPR0+4*13(base) -#define REST_NVGPRS(base) lmw 13, GPR0+4*13(base) +#define SAVE_NVGPRS(base) SAVE_GPR(13, base); SAVE_8GPRS(14, base); SAVE_10GPRS(22, base) +#define REST_NVGPRS(base) REST_GPR(13, base); REST_8GPRS(14, base); REST_10GPRS(22, base) #endif #define SAVE_2GPRS(n, base) SAVE_GPR(n, base); SAVE_GPR(n+1, base) From ed8a5d63a0da5ea9ed910918be134669ca39454b Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Fri, 22 Oct 2021 16:13:22 +1000 Subject: [PATCH 0372/1056] powerpc: flexible GPR range save/restore macros [ Upstream commit aebd1fb45c622e9a2b06fb70665d084d3a8d6c78 ] Introduce macros that operate on a (start, end) range of GPRs, which reduces lines of code and need to do mental arithmetic while reading the code. Signed-off-by: Nicholas Piggin Reviewed-by: Segher Boessenkool Reviewed-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20211022061322.2671178-1-npiggin@gmail.com Signed-off-by: Sasha Levin --- arch/powerpc/boot/crt0.S | 31 +++++++------ arch/powerpc/crypto/md5-asm.S | 10 ++--- arch/powerpc/crypto/sha1-powerpc-asm.S | 6 +-- arch/powerpc/include/asm/ppc_asm.h | 43 ++++++++++++------- arch/powerpc/kernel/entry_32.S | 23 ++++------ arch/powerpc/kernel/exceptions-64e.S | 14 ++---- arch/powerpc/kernel/exceptions-64s.S | 6 +-- arch/powerpc/kernel/head_32.h | 3 +- arch/powerpc/kernel/head_booke.h | 3 +- arch/powerpc/kernel/interrupt_64.S | 34 ++++++--------- arch/powerpc/kernel/optprobes_head.S | 4 +- arch/powerpc/kernel/tm.S | 15 ++----- .../powerpc/kernel/trace/ftrace_64_mprofile.S | 15 +++---- arch/powerpc/kvm/book3s_hv_rmhandlers.S | 5 +-- .../lib/test_emulate_step_exec_instr.S | 8 ++-- 15 files changed, 94 insertions(+), 126 deletions(-) diff --git a/arch/powerpc/boot/crt0.S b/arch/powerpc/boot/crt0.S index 1d83966f5ef640..e8f10a59965933 100644 --- a/arch/powerpc/boot/crt0.S +++ b/arch/powerpc/boot/crt0.S @@ -226,16 +226,19 @@ p_base: mflr r10 /* r10 now points to runtime addr of p_base */ #ifdef __powerpc64__ #define PROM_FRAME_SIZE 512 -#define SAVE_GPR(n, base) std n,8*(n)(base) -#define REST_GPR(n, base) ld n,8*(n)(base) -#define SAVE_2GPRS(n, base) SAVE_GPR(n, base); SAVE_GPR(n+1, base) -#define SAVE_4GPRS(n, base) SAVE_2GPRS(n, base); SAVE_2GPRS(n+2, base) -#define SAVE_8GPRS(n, base) SAVE_4GPRS(n, base); SAVE_4GPRS(n+4, base) -#define SAVE_10GPRS(n, base) SAVE_8GPRS(n, base); SAVE_2GPRS(n+8, base) -#define REST_2GPRS(n, base) REST_GPR(n, base); REST_GPR(n+1, base) -#define REST_4GPRS(n, base) REST_2GPRS(n, base); REST_2GPRS(n+2, base) -#define REST_8GPRS(n, base) REST_4GPRS(n, base); REST_4GPRS(n+4, base) -#define REST_10GPRS(n, base) REST_8GPRS(n, base); REST_2GPRS(n+8, base) + +.macro OP_REGS op, width, start, end, base, offset + .Lreg=\start + .rept (\end - \start + 1) + \op .Lreg,\offset+\width*.Lreg(\base) + .Lreg=.Lreg+1 + .endr +.endm + +#define SAVE_GPRS(start, end, base) OP_REGS std, 8, start, end, base, 0 +#define REST_GPRS(start, end, base) OP_REGS ld, 8, start, end, base, 0 +#define SAVE_GPR(n, base) SAVE_GPRS(n, n, base) +#define REST_GPR(n, base) REST_GPRS(n, n, base) /* prom handles the jump into and return from firmware. The prom args pointer is loaded in r3. */ @@ -246,9 +249,7 @@ prom: stdu r1,-PROM_FRAME_SIZE(r1) /* Save SP and create stack space */ SAVE_GPR(2, r1) - SAVE_GPR(13, r1) - SAVE_8GPRS(14, r1) - SAVE_10GPRS(22, r1) + SAVE_GPRS(13, 31, r1) mfcr r10 std r10,8*32(r1) mfmsr r10 @@ -283,9 +284,7 @@ prom: /* Restore other registers */ REST_GPR(2, r1) - REST_GPR(13, r1) - REST_8GPRS(14, r1) - REST_10GPRS(22, r1) + REST_GPRS(13, 31, r1) ld r10,8*32(r1) mtcr r10 diff --git a/arch/powerpc/crypto/md5-asm.S b/arch/powerpc/crypto/md5-asm.S index 948d100a293436..fa6bc440cf4acf 100644 --- a/arch/powerpc/crypto/md5-asm.S +++ b/arch/powerpc/crypto/md5-asm.S @@ -38,15 +38,11 @@ #define INITIALIZE \ PPC_STLU r1,-INT_FRAME_SIZE(r1); \ - SAVE_8GPRS(14, r1); /* push registers onto stack */ \ - SAVE_4GPRS(22, r1); \ - SAVE_GPR(26, r1) + SAVE_GPRS(14, 26, r1) /* push registers onto stack */ #define FINALIZE \ - REST_8GPRS(14, r1); /* pop registers from stack */ \ - REST_4GPRS(22, r1); \ - REST_GPR(26, r1); \ - addi r1,r1,INT_FRAME_SIZE; + REST_GPRS(14, 26, r1); /* pop registers from stack */ \ + addi r1,r1,INT_FRAME_SIZE #ifdef __BIG_ENDIAN__ #define LOAD_DATA(reg, off) \ diff --git a/arch/powerpc/crypto/sha1-powerpc-asm.S b/arch/powerpc/crypto/sha1-powerpc-asm.S index 23e248beff7166..f0d5ed557ab14d 100644 --- a/arch/powerpc/crypto/sha1-powerpc-asm.S +++ b/arch/powerpc/crypto/sha1-powerpc-asm.S @@ -125,8 +125,7 @@ _GLOBAL(powerpc_sha_transform) PPC_STLU r1,-INT_FRAME_SIZE(r1) - SAVE_8GPRS(14, r1) - SAVE_10GPRS(22, r1) + SAVE_GPRS(14, 31, r1) /* Load up A - E */ lwz RA(0),0(r3) /* A */ @@ -184,7 +183,6 @@ _GLOBAL(powerpc_sha_transform) stw RD(0),12(r3) stw RE(0),16(r3) - REST_8GPRS(14, r1) - REST_10GPRS(22, r1) + REST_GPRS(14, 31, r1) addi r1,r1,INT_FRAME_SIZE blr diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h index 7be24048b8d1a4..f21e6bde17a1e1 100644 --- a/arch/powerpc/include/asm/ppc_asm.h +++ b/arch/powerpc/include/asm/ppc_asm.h @@ -16,30 +16,41 @@ #define SZL (BITS_PER_LONG/8) +/* + * This expands to a sequence of operations with reg incrementing from + * start to end inclusive, of this form: + * + * op reg, (offset + (width * reg))(base) + * + * Note that offset is not the offset of the first operation unless start + * is zero (or width is zero). + */ +.macro OP_REGS op, width, start, end, base, offset + .Lreg=\start + .rept (\end - \start + 1) + \op .Lreg, \offset + \width * .Lreg(\base) + .Lreg=.Lreg+1 + .endr +.endm + /* * Macros for storing registers into and loading registers from * exception frames. */ #ifdef __powerpc64__ -#define SAVE_GPR(n, base) std n,GPR0+8*(n)(base) -#define REST_GPR(n, base) ld n,GPR0+8*(n)(base) -#define SAVE_NVGPRS(base) SAVE_8GPRS(14, base); SAVE_10GPRS(22, base) -#define REST_NVGPRS(base) REST_8GPRS(14, base); REST_10GPRS(22, base) +#define SAVE_GPRS(start, end, base) OP_REGS std, 8, start, end, base, GPR0 +#define REST_GPRS(start, end, base) OP_REGS ld, 8, start, end, base, GPR0 +#define SAVE_NVGPRS(base) SAVE_GPRS(14, 31, base) +#define REST_NVGPRS(base) REST_GPRS(14, 31, base) #else -#define SAVE_GPR(n, base) stw n,GPR0+4*(n)(base) -#define REST_GPR(n, base) lwz n,GPR0+4*(n)(base) -#define SAVE_NVGPRS(base) SAVE_GPR(13, base); SAVE_8GPRS(14, base); SAVE_10GPRS(22, base) -#define REST_NVGPRS(base) REST_GPR(13, base); REST_8GPRS(14, base); REST_10GPRS(22, base) +#define SAVE_GPRS(start, end, base) OP_REGS stw, 4, start, end, base, GPR0 +#define REST_GPRS(start, end, base) OP_REGS lwz, 4, start, end, base, GPR0 +#define SAVE_NVGPRS(base) SAVE_GPRS(13, 31, base) +#define REST_NVGPRS(base) REST_GPRS(13, 31, base) #endif -#define SAVE_2GPRS(n, base) SAVE_GPR(n, base); SAVE_GPR(n+1, base) -#define SAVE_4GPRS(n, base) SAVE_2GPRS(n, base); SAVE_2GPRS(n+2, base) -#define SAVE_8GPRS(n, base) SAVE_4GPRS(n, base); SAVE_4GPRS(n+4, base) -#define SAVE_10GPRS(n, base) SAVE_8GPRS(n, base); SAVE_2GPRS(n+8, base) -#define REST_2GPRS(n, base) REST_GPR(n, base); REST_GPR(n+1, base) -#define REST_4GPRS(n, base) REST_2GPRS(n, base); REST_2GPRS(n+2, base) -#define REST_8GPRS(n, base) REST_4GPRS(n, base); REST_4GPRS(n+4, base) -#define REST_10GPRS(n, base) REST_8GPRS(n, base); REST_2GPRS(n+8, base) +#define SAVE_GPR(n, base) SAVE_GPRS(n, n, base) +#define REST_GPR(n, base) REST_GPRS(n, n, base) #define SAVE_FPR(n, base) stfd n,8*TS_FPRWIDTH*(n)(base) #define SAVE_2FPRS(n, base) SAVE_FPR(n, base); SAVE_FPR(n+1, base) diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index 61fdd53cdd9af1..c62dd981596538 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -90,8 +90,7 @@ transfer_to_syscall: stw r12,8(r1) stw r2,_TRAP(r1) SAVE_GPR(0, r1) - SAVE_4GPRS(3, r1) - SAVE_2GPRS(7, r1) + SAVE_GPRS(3, 8, r1) addi r2,r10,-THREAD SAVE_NVGPRS(r1) @@ -139,7 +138,7 @@ syscall_exit_finish: mtxer r5 lwz r0,GPR0(r1) lwz r3,GPR3(r1) - REST_8GPRS(4,r1) + REST_GPRS(4, 11, r1) lwz r12,GPR12(r1) b 1b @@ -232,9 +231,9 @@ fast_exception_return: beq 3f /* if not, we've got problems */ #endif -2: REST_4GPRS(3, r11) +2: REST_GPRS(3, 6, r11) lwz r10,_CCR(r11) - REST_2GPRS(1, r11) + REST_GPRS(1, 2, r11) mtcr r10 lwz r10,_LINK(r11) mtlr r10 @@ -298,16 +297,14 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) * the reliable stack unwinder later on. Clear it. */ stw r0,8(r1) - REST_4GPRS(7, r1) - REST_2GPRS(11, r1) + REST_GPRS(7, 12, r1) mtcr r3 mtlr r4 mtctr r5 mtspr SPRN_XER,r6 - REST_4GPRS(2, r1) - REST_GPR(6, r1) + REST_GPRS(2, 6, r1) REST_GPR(0, r1) REST_GPR(1, r1) rfi @@ -341,8 +338,7 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) lwz r6,_CCR(r1) li r0,0 - REST_4GPRS(7, r1) - REST_2GPRS(11, r1) + REST_GPRS(7, 12, r1) mtlr r3 mtctr r4 @@ -354,7 +350,7 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) */ stw r0,8(r1) - REST_4GPRS(2, r1) + REST_GPRS(2, 5, r1) bne- cr1,1f /* emulate stack store */ mtcr r6 @@ -430,8 +426,7 @@ _ASM_NOKPROBE_SYMBOL(interrupt_return) bne interrupt_return; \ lwz r0,GPR0(r1); \ lwz r2,GPR2(r1); \ - REST_4GPRS(3, r1); \ - REST_2GPRS(7, r1); \ + REST_GPRS(3, 8, r1); \ lwz r10,_XER(r1); \ lwz r11,_CTR(r1); \ mtspr SPRN_XER,r10; \ diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S index 711c66b76df1a3..67dc4e3179a02a 100644 --- a/arch/powerpc/kernel/exceptions-64e.S +++ b/arch/powerpc/kernel/exceptions-64e.S @@ -198,8 +198,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV) stdcx. r0,0,r1 /* to clear the reservation */ - REST_4GPRS(2, r1) - REST_4GPRS(6, r1) + REST_GPRS(2, 9, r1) ld r10,_CTR(r1) ld r11,_XER(r1) @@ -375,9 +374,7 @@ ret_from_mc_except: exc_##n##_common: \ std r0,GPR0(r1); /* save r0 in stackframe */ \ std r2,GPR2(r1); /* save r2 in stackframe */ \ - SAVE_4GPRS(3, r1); /* save r3 - r6 in stackframe */ \ - SAVE_2GPRS(7, r1); /* save r7, r8 in stackframe */ \ - std r9,GPR9(r1); /* save r9 in stackframe */ \ + SAVE_GPRS(3, 9, r1); /* save r3 - r9 in stackframe */ \ std r10,_NIP(r1); /* save SRR0 to stackframe */ \ std r11,_MSR(r1); /* save SRR1 to stackframe */ \ beq 2f; /* if from kernel mode */ \ @@ -1061,9 +1058,7 @@ bad_stack_book3e: std r11,_ESR(r1) std r0,GPR0(r1); /* save r0 in stackframe */ \ std r2,GPR2(r1); /* save r2 in stackframe */ \ - SAVE_4GPRS(3, r1); /* save r3 - r6 in stackframe */ \ - SAVE_2GPRS(7, r1); /* save r7, r8 in stackframe */ \ - std r9,GPR9(r1); /* save r9 in stackframe */ \ + SAVE_GPRS(3, 9, r1); /* save r3 - r9 in stackframe */ \ ld r3,PACA_EXGEN+EX_R10(r13);/* get back r10 */ \ ld r4,PACA_EXGEN+EX_R11(r13);/* get back r11 */ \ mfspr r5,SPRN_SPRG_GEN_SCRATCH;/* get back r13 XXX can be wrong */ \ @@ -1077,8 +1072,7 @@ bad_stack_book3e: std r10,_LINK(r1) std r11,_CTR(r1) std r12,_XER(r1) - SAVE_10GPRS(14,r1) - SAVE_8GPRS(24,r1) + SAVE_GPRS(14, 31, r1) lhz r12,PACA_TRAP_SAVE(r13) std r12,_TRAP(r1) addi r11,r1,INT_FRAME_SIZE diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index eaf1f72131a18f..277eccf0f08688 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -574,8 +574,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR) ld r10,IAREA+EX_CTR(r13) std r10,_CTR(r1) std r2,GPR2(r1) /* save r2 in stackframe */ - SAVE_4GPRS(3, r1) /* save r3 - r6 in stackframe */ - SAVE_2GPRS(7, r1) /* save r7, r8 in stackframe */ + SAVE_GPRS(3, 8, r1) /* save r3 - r8 in stackframe */ mflr r9 /* Get LR, later save to stack */ ld r2,PACATOC(r13) /* get kernel TOC into r2 */ std r9,_LINK(r1) @@ -693,8 +692,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR) mtlr r9 ld r9,_CCR(r1) mtcr r9 - REST_8GPRS(2, r1) - REST_4GPRS(10, r1) + REST_GPRS(2, 13, r1) REST_GPR(0, r1) /* restore original r1. */ ld r1,GPR1(r1) diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h index 349c4a820231bd..261c79bdbe53fb 100644 --- a/arch/powerpc/kernel/head_32.h +++ b/arch/powerpc/kernel/head_32.h @@ -115,8 +115,7 @@ _ASM_NOKPROBE_SYMBOL(\name\()_virt) stw r10,8(r1) li r10, \trapno stw r10,_TRAP(r1) - SAVE_4GPRS(3, r1) - SAVE_2GPRS(7, r1) + SAVE_GPRS(3, 8, r1) SAVE_NVGPRS(r1) stw r2,GPR2(r1) stw r12,_NIP(r1) diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h index ef8d1b1c234e7a..bb6d5d0fc4ac8e 100644 --- a/arch/powerpc/kernel/head_booke.h +++ b/arch/powerpc/kernel/head_booke.h @@ -87,8 +87,7 @@ END_BTB_FLUSH_SECTION stw r10, 8(r1) li r10, \trapno stw r10,_TRAP(r1) - SAVE_4GPRS(3, r1) - SAVE_2GPRS(7, r1) + SAVE_GPRS(3, 8, r1) SAVE_NVGPRS(r1) stw r2,GPR2(r1) stw r12,_NIP(r1) diff --git a/arch/powerpc/kernel/interrupt_64.S b/arch/powerpc/kernel/interrupt_64.S index 4c6d1a8dcefed6..ff8c8c03f41ac4 100644 --- a/arch/powerpc/kernel/interrupt_64.S +++ b/arch/powerpc/kernel/interrupt_64.S @@ -166,10 +166,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) * The value of AMR only matters while we're in the kernel. */ mtcr r2 - ld r2,GPR2(r1) - ld r3,GPR3(r1) - ld r13,GPR13(r1) - ld r1,GPR1(r1) + REST_GPRS(2, 3, r1) + REST_GPR(13, r1) + REST_GPR(1, r1) RFSCV_TO_USER b . /* prevent speculative execution */ @@ -187,9 +186,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) mtctr r3 mtlr r4 mtspr SPRN_XER,r5 - REST_10GPRS(2, r1) - REST_2GPRS(12, r1) - ld r1,GPR1(r1) + REST_GPRS(2, 13, r1) + REST_GPR(1, r1) RFI_TO_USER .Lsyscall_vectored_\name\()_rst_end: @@ -378,10 +376,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) * The value of AMR only matters while we're in the kernel. */ mtcr r2 - ld r2,GPR2(r1) - ld r3,GPR3(r1) - ld r13,GPR13(r1) - ld r1,GPR1(r1) + REST_GPRS(2, 3, r1) + REST_GPR(13, r1) + REST_GPR(1, r1) RFI_TO_USER b . /* prevent speculative execution */ @@ -392,8 +389,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) mtctr r3 mtspr SPRN_XER,r4 ld r0,GPR0(r1) - REST_8GPRS(4, r1) - ld r12,GPR12(r1) + REST_GPRS(4, 12, r1) b .Lsyscall_restore_regs_cont .Lsyscall_rst_end: @@ -522,17 +518,14 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) ld r6,_XER(r1) li r0,0 - REST_4GPRS(7, r1) - REST_2GPRS(11, r1) - REST_GPR(13, r1) + REST_GPRS(7, 13, r1) mtcr r3 mtlr r4 mtctr r5 mtspr SPRN_XER,r6 - REST_4GPRS(2, r1) - REST_GPR(6, r1) + REST_GPRS(2, 6, r1) REST_GPR(0, r1) REST_GPR(1, r1) .ifc \srr,srr @@ -629,8 +622,7 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) ld r6,_CCR(r1) li r0,0 - REST_4GPRS(7, r1) - REST_2GPRS(11, r1) + REST_GPRS(7, 12, r1) mtlr r3 mtctr r4 @@ -642,7 +634,7 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) */ std r0,STACK_FRAME_OVERHEAD-16(r1) - REST_4GPRS(2, r1) + REST_GPRS(2, 5, r1) bne- cr1,1f /* emulate stack store */ mtcr r6 diff --git a/arch/powerpc/kernel/optprobes_head.S b/arch/powerpc/kernel/optprobes_head.S index 19ea3312403ca3..5c7f0b4b784b26 100644 --- a/arch/powerpc/kernel/optprobes_head.S +++ b/arch/powerpc/kernel/optprobes_head.S @@ -10,8 +10,8 @@ #include #ifdef CONFIG_PPC64 -#define SAVE_30GPRS(base) SAVE_10GPRS(2,base); SAVE_10GPRS(12,base); SAVE_10GPRS(22,base) -#define REST_30GPRS(base) REST_10GPRS(2,base); REST_10GPRS(12,base); REST_10GPRS(22,base) +#define SAVE_30GPRS(base) SAVE_GPRS(2, 31, base) +#define REST_30GPRS(base) REST_GPRS(2, 31, base) #define TEMPLATE_FOR_IMM_LOAD_INSNS nop; nop; nop; nop; nop #else #define SAVE_30GPRS(base) stmw r2, GPR2(base) diff --git a/arch/powerpc/kernel/tm.S b/arch/powerpc/kernel/tm.S index 2b91f233b05d58..3beecc32940bc2 100644 --- a/arch/powerpc/kernel/tm.S +++ b/arch/powerpc/kernel/tm.S @@ -226,11 +226,8 @@ _GLOBAL(tm_reclaim) /* Sync the userland GPRs 2-12, 14-31 to thread->regs: */ SAVE_GPR(0, r7) /* user r0 */ - SAVE_GPR(2, r7) /* user r2 */ - SAVE_4GPRS(3, r7) /* user r3-r6 */ - SAVE_GPR(8, r7) /* user r8 */ - SAVE_GPR(9, r7) /* user r9 */ - SAVE_GPR(10, r7) /* user r10 */ + SAVE_GPRS(2, 6, r7) /* user r2-r6 */ + SAVE_GPRS(8, 10, r7) /* user r8-r10 */ ld r3, GPR1(r1) /* user r1 */ ld r4, GPR7(r1) /* user r7 */ ld r5, GPR11(r1) /* user r11 */ @@ -445,12 +442,8 @@ restore_gprs: ld r6, THREAD_TM_PPR(r3) REST_GPR(0, r7) /* GPR0 */ - REST_2GPRS(2, r7) /* GPR2-3 */ - REST_GPR(4, r7) /* GPR4 */ - REST_4GPRS(8, r7) /* GPR8-11 */ - REST_2GPRS(12, r7) /* GPR12-13 */ - - REST_NVGPRS(r7) /* GPR14-31 */ + REST_GPRS(2, 4, r7) /* GPR2-4 */ + REST_GPRS(8, 31, r7) /* GPR8-31 */ /* Load up PPR and DSCR here so we don't run with user values for long */ mtspr SPRN_DSCR, r5 diff --git a/arch/powerpc/kernel/trace/ftrace_64_mprofile.S b/arch/powerpc/kernel/trace/ftrace_64_mprofile.S index f9fd5f743eba34..d636fc755f608e 100644 --- a/arch/powerpc/kernel/trace/ftrace_64_mprofile.S +++ b/arch/powerpc/kernel/trace/ftrace_64_mprofile.S @@ -41,15 +41,14 @@ _GLOBAL(ftrace_regs_caller) /* Save all gprs to pt_regs */ SAVE_GPR(0, r1) - SAVE_10GPRS(2, r1) + SAVE_GPRS(2, 11, r1) /* Ok to continue? */ lbz r3, PACA_FTRACE_ENABLED(r13) cmpdi r3, 0 beq ftrace_no_trace - SAVE_10GPRS(12, r1) - SAVE_10GPRS(22, r1) + SAVE_GPRS(12, 31, r1) /* Save previous stack pointer (r1) */ addi r8, r1, SWITCH_FRAME_SIZE @@ -108,10 +107,8 @@ ftrace_regs_call: #endif /* Restore gprs */ - REST_GPR(0,r1) - REST_10GPRS(2,r1) - REST_10GPRS(12,r1) - REST_10GPRS(22,r1) + REST_GPR(0, r1) + REST_GPRS(2, 31, r1) /* Restore possibly modified LR */ ld r0, _LINK(r1) @@ -157,7 +154,7 @@ _GLOBAL(ftrace_caller) stdu r1, -SWITCH_FRAME_SIZE(r1) /* Save all gprs to pt_regs */ - SAVE_8GPRS(3, r1) + SAVE_GPRS(3, 10, r1) lbz r3, PACA_FTRACE_ENABLED(r13) cmpdi r3, 0 @@ -194,7 +191,7 @@ ftrace_call: mtctr r3 /* Restore gprs */ - REST_8GPRS(3,r1) + REST_GPRS(3, 10, r1) /* Restore callee's TOC */ ld r2, 24(r1) diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 32a4b4d412b92b..81fc1e0ebe9a84 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -2711,8 +2711,7 @@ kvmppc_bad_host_intr: std r0, GPR0(r1) std r9, GPR1(r1) std r2, GPR2(r1) - SAVE_4GPRS(3, r1) - SAVE_2GPRS(7, r1) + SAVE_GPRS(3, 8, r1) srdi r0, r12, 32 clrldi r12, r12, 32 std r0, _CCR(r1) @@ -2735,7 +2734,7 @@ kvmppc_bad_host_intr: ld r9, HSTATE_SCRATCH2(r13) ld r12, HSTATE_SCRATCH0(r13) GET_SCRATCH0(r0) - SAVE_4GPRS(9, r1) + SAVE_GPRS(9, 12, r1) std r0, GPR13(r1) SAVE_NVGPRS(r1) ld r5, HSTATE_CFAR(r13) diff --git a/arch/powerpc/lib/test_emulate_step_exec_instr.S b/arch/powerpc/lib/test_emulate_step_exec_instr.S index 9ef941d958d807..5473f9d03df3a0 100644 --- a/arch/powerpc/lib/test_emulate_step_exec_instr.S +++ b/arch/powerpc/lib/test_emulate_step_exec_instr.S @@ -37,7 +37,7 @@ _GLOBAL(exec_instr) * The stack pointer (GPR1) and the thread pointer (GPR13) are not * saved as these should not be modified anyway. */ - SAVE_2GPRS(2, r1) + SAVE_GPRS(2, 3, r1) SAVE_NVGPRS(r1) /* @@ -75,8 +75,7 @@ _GLOBAL(exec_instr) /* Load GPRs from pt_regs */ REST_GPR(0, r31) - REST_10GPRS(2, r31) - REST_GPR(12, r31) + REST_GPRS(2, 12, r31) REST_NVGPRS(r31) /* Placeholder for the test instruction */ @@ -99,8 +98,7 @@ _GLOBAL(exec_instr) subi r3, r3, GPR0 SAVE_GPR(0, r3) SAVE_GPR(2, r3) - SAVE_8GPRS(4, r3) - SAVE_GPR(12, r3) + SAVE_GPRS(4, 12, r3) SAVE_NVGPRS(r3) /* Save resulting LR to pt_regs */ From 5dce84f475d13b773a1a4c823581cab25044d86a Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Fri, 11 Mar 2022 12:47:33 +1000 Subject: [PATCH 0373/1056] powerpc/tm: Fix more userspace r13 corruption [ Upstream commit 9d71165d3934e607070c4e48458c0cf161b1baea ] Commit cf13435b730a ("powerpc/tm: Fix userspace r13 corruption") fixes a problem in treclaim where a SLB miss can occur on the thread_struct->ckpt_regs while SCRATCH0 is live with the saved user r13 value, clobbering it with the kernel r13 and ultimately resulting in kernel r13 being stored in ckpt_regs. There is an equivalent problem in trechkpt where the user r13 value is loaded into r13 from chkpt_regs to be recheckpointed, but a SLB miss could occur on ckpt_regs accesses after that, which will result in r13 being clobbered with a kernel value and that will get recheckpointed and then restored to user registers. The same memory page is accessed right before this critical window where a SLB miss could cause corruption, so hitting the bug requires the SLB entry be removed within a small window of instructions, which is possible if a SLB related MCE hits there. PAPR also permits the hypervisor to discard this SLB entry (because slb_shadow->persistent is only set to SLB_NUM_BOLTED) although it's not known whether any implementations would do this (KVM does not). So this is an extremely unlikely bug, only found by inspection. Fix this by also storing user r13 in a temporary location on the kernel stack and don't change the r13 register from kernel r13 until the RI=0 critical section that does not fault. The SCRATCH0 change is not strictly part of the fix, it's only used in the RI=0 section so it does not have the same problem as the previous SCRATCH0 bug. Fixes: 98ae22e15b43 ("powerpc: Add helper functions for transactional memory context switching") Cc: stable@vger.kernel.org # v3.9+ Signed-off-by: Nicholas Piggin Acked-by: Michael Neuling Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220311024733.48926-1-npiggin@gmail.com Signed-off-by: Sasha Levin --- arch/powerpc/kernel/tm.S | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/arch/powerpc/kernel/tm.S b/arch/powerpc/kernel/tm.S index 3beecc32940bc2..5a0f023a26e90f 100644 --- a/arch/powerpc/kernel/tm.S +++ b/arch/powerpc/kernel/tm.S @@ -443,7 +443,8 @@ restore_gprs: REST_GPR(0, r7) /* GPR0 */ REST_GPRS(2, 4, r7) /* GPR2-4 */ - REST_GPRS(8, 31, r7) /* GPR8-31 */ + REST_GPRS(8, 12, r7) /* GPR8-12 */ + REST_GPRS(14, 31, r7) /* GPR14-31 */ /* Load up PPR and DSCR here so we don't run with user values for long */ mtspr SPRN_DSCR, r5 @@ -479,18 +480,24 @@ restore_gprs: REST_GPR(6, r7) /* - * Store r1 and r5 on the stack so that we can access them after we - * clear MSR RI. + * Store user r1 and r5 and r13 on the stack (in the unused save + * areas / compiler reserved areas), so that we can access them after + * we clear MSR RI. */ REST_GPR(5, r7) std r5, -8(r1) - ld r5, GPR1(r7) + ld r5, GPR13(r7) std r5, -16(r1) + ld r5, GPR1(r7) + std r5, -24(r1) REST_GPR(7, r7) - /* Clear MSR RI since we are about to use SCRATCH0. EE is already off */ + /* Stash the stack pointer away for use after recheckpoint */ + std r1, PACAR1(r13) + + /* Clear MSR RI since we are about to clobber r13. EE is already off */ li r5, 0 mtmsrd r5, 1 @@ -501,9 +508,9 @@ restore_gprs: * until we turn MSR RI back on. */ - SET_SCRATCH0(r1) ld r5, -8(r1) - ld r1, -16(r1) + ld r13, -16(r1) + ld r1, -24(r1) /* Commit register state as checkpointed state: */ TRECHKPT @@ -519,9 +526,9 @@ restore_gprs: */ GET_PACA(r13) - GET_SCRATCH0(r1) + ld r1, PACAR1(r13) - /* R1 is restored, so we are recoverable again. EE is still off */ + /* R13, R1 is restored, so we are recoverable again. EE is still off */ li r4, MSR_RI mtmsrd r4, 1 From 170a08ad3d1a184f3f84e5d56031541740c7f874 Mon Sep 17 00:00:00 2001 From: Hui Wang Date: Tue, 8 Mar 2022 19:00:42 +0800 Subject: [PATCH 0374/1056] serial: sc16is7xx: Clear RS485 bits in the shutdown [ Upstream commit 927728a34f11b5a27f4610bdb7068317d6fdc72a ] We tested RS485 function on an EVB which has SC16IS752, after finishing the test, we started the RS232 function test, but found the RTS is still working in the RS485 mode. That is because both startup and shutdown call port_update() to set the EFCR_REG, this will not clear the RS485 bits once the bits are set in the reconf_rs485(). To fix it, clear the RS485 bits in shutdown. Cc: Signed-off-by: Hui Wang Link: https://lore.kernel.org/r/20220308110042.108451-1-hui.wang@canonical.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/tty/serial/sc16is7xx.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/tty/serial/sc16is7xx.c b/drivers/tty/serial/sc16is7xx.c index 0ab788058fa2af..e98aa7b97cc5eb 100644 --- a/drivers/tty/serial/sc16is7xx.c +++ b/drivers/tty/serial/sc16is7xx.c @@ -1055,10 +1055,12 @@ static void sc16is7xx_shutdown(struct uart_port *port) /* Disable all interrupts */ sc16is7xx_port_write(port, SC16IS7XX_IER_REG, 0); - /* Disable TX/RX */ + /* Disable TX/RX, clear auto RS485 and RTS invert */ sc16is7xx_port_update(port, SC16IS7XX_EFCR_REG, SC16IS7XX_EFCR_RXDISABLE_BIT | - SC16IS7XX_EFCR_TXDISABLE_BIT, + SC16IS7XX_EFCR_TXDISABLE_BIT | + SC16IS7XX_EFCR_AUTO_RS485_BIT | + SC16IS7XX_EFCR_RTS_INVERT_BIT, SC16IS7XX_EFCR_RXDISABLE_BIT | SC16IS7XX_EFCR_TXDISABLE_BIT); From 3f6d5cb0a5e53ccb6a6deaa2e8b1762c4bb38e3a Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 16 Dec 2021 13:42:26 +0530 Subject: [PATCH 0375/1056] bus: mhi: core: Use correctly sized arguments for bit field [ Upstream commit 5a717e93239fc373a314e03e45c43b62ebea1b26 ] The find.h APIs are designed to be used only on unsigned long arguments. This can technically result in a over-read, but it is harmless in this case. Regardless, fix it to avoid the warning seen under -Warray-bounds, which we'd like to enable globally: In file included from ./include/linux/bitmap.h:9, from ./include/linux/cpumask.h:12, from ./arch/x86/include/asm/cpumask.h:5, from ./arch/x86/include/asm/msr.h:11, from ./arch/x86/include/asm/processor.h:22, from ./arch/x86/include/asm/cpufeature.h:5, from ./arch/x86/include/asm/thread_info.h:53, from ./include/linux/thread_info.h:60, from ./arch/x86/include/asm/preempt.h:7, from ./include/linux/preempt.h:78, from ./include/linux/spinlock.h:55, from ./include/linux/wait.h:9, from ./include/linux/wait_bit.h:8, from ./include/linux/fs.h:6, from ./include/linux/debugfs.h:15, from drivers/bus/mhi/core/init.c:7: drivers/bus/mhi/core/init.c: In function 'to_mhi_pm_state_str': ./include/linux/find.h:187:37: warning: array subscript 'long unsigned int[0]' is partly outside array bounds of 'enum mhi_pm_state[1]' [-Warray-bounds] 187 | unsigned long val = *addr & GENMASK(size - 1, 0); | ^~~~~ drivers/bus/mhi/core/init.c:80:51: note: while referencing 'state' 80 | const char *to_mhi_pm_state_str(enum mhi_pm_state state) | ~~~~~~~~~~~~~~~~~~^~~~~ Link: https://lore.kernel.org/r/20211215232446.2069794-1-keescook@chromium.org [mani: changed the variable name "bits" to "pm_state"] Reviewed-by: Manivannan Sadhasivam Signed-off-by: Kees Cook Signed-off-by: Manivannan Sadhasivam Link: https://lore.kernel.org/r/20211216081227.237749-10-manivannan.sadhasivam@linaro.org Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/bus/mhi/core/init.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/bus/mhi/core/init.c b/drivers/bus/mhi/core/init.c index 4183945fc2c414..c0187367ae7520 100644 --- a/drivers/bus/mhi/core/init.c +++ b/drivers/bus/mhi/core/init.c @@ -79,7 +79,8 @@ static const char * const mhi_pm_state_str[] = { const char *to_mhi_pm_state_str(enum mhi_pm_state state) { - int index = find_last_bit((unsigned long *)&state, 32); + unsigned long pm_state = state; + int index = find_last_bit(&pm_state, 32); if (index >= ARRAY_SIZE(mhi_pm_state_str)) return "Invalid State"; From c2f3dab1ac54c5a6eb7f633e7d9205269306cc21 Mon Sep 17 00:00:00 2001 From: Paul Davey Date: Tue, 1 Mar 2022 21:33:00 +0530 Subject: [PATCH 0376/1056] bus: mhi: Fix pm_state conversion to string [ Upstream commit 64f93a9a27c1970fa8ee5ffc5a6ae2bda477ec5b ] On big endian architectures the mhi debugfs files which report pm state give "Invalid State" for all states. This is caused by using find_last_bit which takes an unsigned long* while the state is passed in as an enum mhi_pm_state which will be of int size. Fix by using __fls to pass the value of state instead of find_last_bit. Also the current API expects "mhi_pm_state" enumerator as the function argument but the function only works with bitmasks. So as Alex suggested, let's change the argument to u32 to avoid confusion. Fixes: a6e2e3522f29 ("bus: mhi: core: Add support for PM state transitions") Cc: stable@vger.kernel.org [mani: changed the function argument to u32] Reviewed-by: Manivannan Sadhasivam Reviewed-by: Hemant Kumar Reviewed-by: Alex Elder Signed-off-by: Paul Davey Signed-off-by: Manivannan Sadhasivam Link: https://lore.kernel.org/r/20220301160308.107452-3-manivannan.sadhasivam@linaro.org Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/bus/mhi/core/init.c | 10 ++++++---- drivers/bus/mhi/core/internal.h | 2 +- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/bus/mhi/core/init.c b/drivers/bus/mhi/core/init.c index c0187367ae7520..d8787aaa176ba8 100644 --- a/drivers/bus/mhi/core/init.c +++ b/drivers/bus/mhi/core/init.c @@ -77,12 +77,14 @@ static const char * const mhi_pm_state_str[] = { [MHI_PM_STATE_LD_ERR_FATAL_DETECT] = "Linkdown or Error Fatal Detect", }; -const char *to_mhi_pm_state_str(enum mhi_pm_state state) +const char *to_mhi_pm_state_str(u32 state) { - unsigned long pm_state = state; - int index = find_last_bit(&pm_state, 32); + int index; - if (index >= ARRAY_SIZE(mhi_pm_state_str)) + if (state) + index = __fls(state); + + if (!state || index >= ARRAY_SIZE(mhi_pm_state_str)) return "Invalid State"; return mhi_pm_state_str[index]; diff --git a/drivers/bus/mhi/core/internal.h b/drivers/bus/mhi/core/internal.h index c02c4d48b744c5..71f181402be989 100644 --- a/drivers/bus/mhi/core/internal.h +++ b/drivers/bus/mhi/core/internal.h @@ -622,7 +622,7 @@ void mhi_free_bhie_table(struct mhi_controller *mhi_cntrl, enum mhi_pm_state __must_check mhi_tryset_pm_state( struct mhi_controller *mhi_cntrl, enum mhi_pm_state state); -const char *to_mhi_pm_state_str(enum mhi_pm_state state); +const char *to_mhi_pm_state_str(u32 state); int mhi_queue_state_transition(struct mhi_controller *mhi_cntrl, enum dev_st_transition state); void mhi_pm_st_worker(struct work_struct *work); From 1d9bd723e7b41121e27c3faec0144b75434eec9b Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 9 Aug 2021 11:21:23 -0700 Subject: [PATCH 0377/1056] stddef: Introduce DECLARE_FLEX_ARRAY() helper [ Upstream commit 3080ea5553cc909b000d1f1d964a9041962f2c5b ] There are many places where kernel code wants to have several different typed trailing flexible arrays. This would normally be done with multiple flexible arrays in a union, but since GCC and Clang don't (on the surface) allow this, there have been many open-coded workarounds, usually involving neighboring 0-element arrays at the end of a structure. For example, instead of something like this: struct thing { ... union { struct type1 foo[]; struct type2 bar[]; }; }; code works around the compiler with: struct thing { ... struct type1 foo[0]; struct type2 bar[]; }; Another case is when a flexible array is wanted as the single member within a struct (which itself is usually in a union). For example, this would be worked around as: union many { ... struct { struct type3 baz[0]; }; }; These kinds of work-arounds cause problems with size checks against such zero-element arrays (for example when building with -Warray-bounds and -Wzero-length-bounds, and with the coming FORTIFY_SOURCE improvements), so they must all be converted to "real" flexible arrays, avoiding warnings like this: fs/hpfs/anode.c: In function 'hpfs_add_sector_to_btree': fs/hpfs/anode.c:209:27: warning: array subscript 0 is outside the bounds of an interior zero-length array 'struct bplus_internal_node[0]' [-Wzero-length-bounds] 209 | anode->btree.u.internal[0].down = cpu_to_le32(a); | ~~~~~~~~~~~~~~~~~~~~~~~^~~ In file included from fs/hpfs/hpfs_fn.h:26, from fs/hpfs/anode.c:10: fs/hpfs/hpfs.h:412:32: note: while referencing 'internal' 412 | struct bplus_internal_node internal[0]; /* (internal) 2-word entries giving | ^~~~~~~~ drivers/net/can/usb/etas_es58x/es58x_fd.c: In function 'es58x_fd_tx_can_msg': drivers/net/can/usb/etas_es58x/es58x_fd.c:360:35: warning: array subscript 65535 is outside the bounds of an interior zero-length array 'u8[0]' {aka 'unsigned char[]'} [-Wzero-length-bounds] 360 | tx_can_msg = (typeof(tx_can_msg))&es58x_fd_urb_cmd->raw_msg[msg_len]; | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ In file included from drivers/net/can/usb/etas_es58x/es58x_core.h:22, from drivers/net/can/usb/etas_es58x/es58x_fd.c:17: drivers/net/can/usb/etas_es58x/es58x_fd.h:231:6: note: while referencing 'raw_msg' 231 | u8 raw_msg[0]; | ^~~~~~~ However, it _is_ entirely possible to have one or more flexible arrays in a struct or union: it just has to be in another struct. And since it cannot be alone in a struct, such a struct must have at least 1 other named member -- but that member can be zero sized. Wrap all this nonsense into the new DECLARE_FLEX_ARRAY() in support of having flexible arrays in unions (or alone in a struct). As with struct_group(), since this is needed in UAPI headers as well, implement the core there, with a non-UAPI wrapper. Additionally update kernel-doc to understand its existence. https://github.com/KSPP/linux/issues/137 Cc: Arnd Bergmann Cc: "Gustavo A. R. Silva" Signed-off-by: Kees Cook Signed-off-by: Sasha Levin --- include/linux/stddef.h | 13 +++++++++++++ include/uapi/linux/stddef.h | 16 ++++++++++++++++ scripts/kernel-doc | 2 ++ 3 files changed, 31 insertions(+) diff --git a/include/linux/stddef.h b/include/linux/stddef.h index 938216f8ab7e7c..31fdbb784c24e2 100644 --- a/include/linux/stddef.h +++ b/include/linux/stddef.h @@ -84,4 +84,17 @@ enum { #define struct_group_tagged(TAG, NAME, MEMBERS...) \ __struct_group(TAG, NAME, /* no attrs */, MEMBERS) +/** + * DECLARE_FLEX_ARRAY() - Declare a flexible array usable in a union + * + * @TYPE: The type of each flexible array element + * @NAME: The name of the flexible array member + * + * In order to have a flexible array member in a union or alone in a + * struct, it needs to be wrapped in an anonymous struct with at least 1 + * named member, but that member can be empty. + */ +#define DECLARE_FLEX_ARRAY(TYPE, NAME) \ + __DECLARE_FLEX_ARRAY(TYPE, NAME) + #endif diff --git a/include/uapi/linux/stddef.h b/include/uapi/linux/stddef.h index 610204f7c275d5..3021ea25a2849e 100644 --- a/include/uapi/linux/stddef.h +++ b/include/uapi/linux/stddef.h @@ -25,3 +25,19 @@ struct { MEMBERS } ATTRS; \ struct TAG { MEMBERS } ATTRS NAME; \ } + +/** + * __DECLARE_FLEX_ARRAY() - Declare a flexible array usable in a union + * + * @TYPE: The type of each flexible array element + * @NAME: The name of the flexible array member + * + * In order to have a flexible array member in a union or alone in a + * struct, it needs to be wrapped in an anonymous struct with at least 1 + * named member, but that member can be empty. + */ +#define __DECLARE_FLEX_ARRAY(TYPE, NAME) \ + struct { \ + struct { } __empty_ ## NAME; \ + TYPE NAME[]; \ + } diff --git a/scripts/kernel-doc b/scripts/kernel-doc index 38aa799a776c0d..5d54b57ff90cc9 100755 --- a/scripts/kernel-doc +++ b/scripts/kernel-doc @@ -1263,6 +1263,8 @@ sub dump_struct($$) { $members =~ s/DECLARE_KFIFO\s*\($args,\s*$args,\s*$args\)/$2 \*$1/gos; # replace DECLARE_KFIFO_PTR $members =~ s/DECLARE_KFIFO_PTR\s*\($args,\s*$args\)/$2 \*$1/gos; + # replace DECLARE_FLEX_ARRAY + $members =~ s/(?:__)?DECLARE_FLEX_ARRAY\s*\($args,\s*$args\)/$1 $2\[\]/gos; my $declaration = $members; # Split nested struct/union elements as newer ones From 121af0231f82c3cd79308da28c151f7db59945da Mon Sep 17 00:00:00 2001 From: Tadeusz Struk Date: Tue, 29 Mar 2022 10:12:52 -0700 Subject: [PATCH 0378/1056] uapi/linux/stddef.h: Add include guards [ Upstream commit 55037ed7bdc62151a726f5685f88afa6a82959b1 ] Add include guard wrapper define to uapi/linux/stddef.h to prevent macro redefinition errors when stddef.h is included more than once. This was not needed before since the only contents already used a redefinition test. Signed-off-by: Tadeusz Struk Link: https://lore.kernel.org/r/20220329171252.57279-1-tadeusz.struk@linaro.org Fixes: 50d7bd38c3aa ("stddef: Introduce struct_group() helper macro") Cc: stable@vger.kernel.org Signed-off-by: Kees Cook Signed-off-by: Sasha Levin --- include/uapi/linux/stddef.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/include/uapi/linux/stddef.h b/include/uapi/linux/stddef.h index 3021ea25a2849e..7837ba4fe72890 100644 --- a/include/uapi/linux/stddef.h +++ b/include/uapi/linux/stddef.h @@ -1,4 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _UAPI_LINUX_STDDEF_H +#define _UAPI_LINUX_STDDEF_H + #include #ifndef __always_inline @@ -41,3 +44,4 @@ struct { } __empty_ ## NAME; \ TYPE NAME[]; \ } +#endif From c0058893a4a9deffcea9965296da77406bad2a24 Mon Sep 17 00:00:00 2001 From: Jack Yu Date: Wed, 29 Sep 2021 13:43:44 +0800 Subject: [PATCH 0379/1056] ASoC: rt5682: move clk related code to rt5682_i2c_probe [ Upstream commit 57589f82762e40bdaa975d840fa2bc5157b5be95 ] The DAI clock is only used in I2S mode, to make it clear and to fix clock resource release issue, we move CCF clock related code to rt5682_i2c_probe to fix clock register/unregister issue. Signed-off-by: Jack Yu Link: https://lore.kernel.org/r/20210929054344.12112-1-jack.yu@realtek.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/codecs/rt5682-i2c.c | 22 +++++++++++ sound/soc/codecs/rt5682.c | 70 +++++++++++++---------------------- sound/soc/codecs/rt5682.h | 3 ++ 3 files changed, 51 insertions(+), 44 deletions(-) diff --git a/sound/soc/codecs/rt5682-i2c.c b/sound/soc/codecs/rt5682-i2c.c index 74a1fee071dd6e..3d2d7c9ce66df6 100644 --- a/sound/soc/codecs/rt5682-i2c.c +++ b/sound/soc/codecs/rt5682-i2c.c @@ -133,6 +133,8 @@ static int rt5682_i2c_probe(struct i2c_client *i2c, i2c_set_clientdata(i2c, rt5682); + rt5682->i2c_dev = &i2c->dev; + rt5682->pdata = i2s_default_platform_data; if (pdata) @@ -270,6 +272,26 @@ static int rt5682_i2c_probe(struct i2c_client *i2c, dev_err(&i2c->dev, "Failed to reguest IRQ: %d\n", ret); } +#ifdef CONFIG_COMMON_CLK + /* Check if MCLK provided */ + rt5682->mclk = devm_clk_get(&i2c->dev, "mclk"); + if (IS_ERR(rt5682->mclk)) { + if (PTR_ERR(rt5682->mclk) != -ENOENT) { + ret = PTR_ERR(rt5682->mclk); + return ret; + } + rt5682->mclk = NULL; + } + + /* Register CCF DAI clock control */ + ret = rt5682_register_dai_clks(rt5682); + if (ret) + return ret; + + /* Initial setup for CCF */ + rt5682->lrck[RT5682_AIF1] = 48000; +#endif + return devm_snd_soc_register_component(&i2c->dev, &rt5682_soc_component_dev, rt5682_dai, ARRAY_SIZE(rt5682_dai)); diff --git a/sound/soc/codecs/rt5682.c b/sound/soc/codecs/rt5682.c index 1cd59e166ccece..80d199843b8c53 100644 --- a/sound/soc/codecs/rt5682.c +++ b/sound/soc/codecs/rt5682.c @@ -2562,7 +2562,7 @@ static int rt5682_set_bias_level(struct snd_soc_component *component, static bool rt5682_clk_check(struct rt5682_priv *rt5682) { if (!rt5682->master[RT5682_AIF1]) { - dev_dbg(rt5682->component->dev, "sysclk/dai not set correctly\n"); + dev_dbg(rt5682->i2c_dev, "sysclk/dai not set correctly\n"); return false; } return true; @@ -2573,13 +2573,15 @@ static int rt5682_wclk_prepare(struct clk_hw *hw) struct rt5682_priv *rt5682 = container_of(hw, struct rt5682_priv, dai_clks_hw[RT5682_DAI_WCLK_IDX]); - struct snd_soc_component *component = rt5682->component; - struct snd_soc_dapm_context *dapm = - snd_soc_component_get_dapm(component); + struct snd_soc_component *component; + struct snd_soc_dapm_context *dapm; if (!rt5682_clk_check(rt5682)) return -EINVAL; + component = rt5682->component; + dapm = snd_soc_component_get_dapm(component); + snd_soc_dapm_mutex_lock(dapm); snd_soc_dapm_force_enable_pin_unlocked(dapm, "MICBIAS"); @@ -2609,13 +2611,15 @@ static void rt5682_wclk_unprepare(struct clk_hw *hw) struct rt5682_priv *rt5682 = container_of(hw, struct rt5682_priv, dai_clks_hw[RT5682_DAI_WCLK_IDX]); - struct snd_soc_component *component = rt5682->component; - struct snd_soc_dapm_context *dapm = - snd_soc_component_get_dapm(component); + struct snd_soc_component *component; + struct snd_soc_dapm_context *dapm; if (!rt5682_clk_check(rt5682)) return; + component = rt5682->component; + dapm = snd_soc_component_get_dapm(component); + snd_soc_dapm_mutex_lock(dapm); snd_soc_dapm_disable_pin_unlocked(dapm, "MICBIAS"); @@ -2639,7 +2643,6 @@ static unsigned long rt5682_wclk_recalc_rate(struct clk_hw *hw, struct rt5682_priv *rt5682 = container_of(hw, struct rt5682_priv, dai_clks_hw[RT5682_DAI_WCLK_IDX]); - struct snd_soc_component *component = rt5682->component; const char * const clk_name = clk_hw_get_name(hw); if (!rt5682_clk_check(rt5682)) @@ -2649,7 +2652,7 @@ static unsigned long rt5682_wclk_recalc_rate(struct clk_hw *hw, */ if (rt5682->lrck[RT5682_AIF1] != CLK_48 && rt5682->lrck[RT5682_AIF1] != CLK_44) { - dev_warn(component->dev, "%s: clk %s only support %d or %d Hz output\n", + dev_warn(rt5682->i2c_dev, "%s: clk %s only support %d or %d Hz output\n", __func__, clk_name, CLK_44, CLK_48); return 0; } @@ -2663,7 +2666,6 @@ static long rt5682_wclk_round_rate(struct clk_hw *hw, unsigned long rate, struct rt5682_priv *rt5682 = container_of(hw, struct rt5682_priv, dai_clks_hw[RT5682_DAI_WCLK_IDX]); - struct snd_soc_component *component = rt5682->component; const char * const clk_name = clk_hw_get_name(hw); if (!rt5682_clk_check(rt5682)) @@ -2673,7 +2675,7 @@ static long rt5682_wclk_round_rate(struct clk_hw *hw, unsigned long rate, * It will force to 48kHz if not both. */ if (rate != CLK_48 && rate != CLK_44) { - dev_warn(component->dev, "%s: clk %s only support %d or %d Hz output\n", + dev_warn(rt5682->i2c_dev, "%s: clk %s only support %d or %d Hz output\n", __func__, clk_name, CLK_44, CLK_48); rate = CLK_48; } @@ -2687,7 +2689,7 @@ static int rt5682_wclk_set_rate(struct clk_hw *hw, unsigned long rate, struct rt5682_priv *rt5682 = container_of(hw, struct rt5682_priv, dai_clks_hw[RT5682_DAI_WCLK_IDX]); - struct snd_soc_component *component = rt5682->component; + struct snd_soc_component *component; struct clk_hw *parent_hw; const char * const clk_name = clk_hw_get_name(hw); int pre_div; @@ -2696,6 +2698,8 @@ static int rt5682_wclk_set_rate(struct clk_hw *hw, unsigned long rate, if (!rt5682_clk_check(rt5682)) return -EINVAL; + component = rt5682->component; + /* * Whether the wclk's parent clk (mclk) exists or not, please ensure * it is fixed or set to 48MHz before setting wclk rate. It's a @@ -2705,12 +2709,12 @@ static int rt5682_wclk_set_rate(struct clk_hw *hw, unsigned long rate, */ parent_hw = clk_hw_get_parent(hw); if (!parent_hw) - dev_warn(component->dev, + dev_warn(rt5682->i2c_dev, "Parent mclk of wclk not acquired in driver. Please ensure mclk was provided as %d Hz.\n", CLK_PLL2_FIN); if (parent_rate != CLK_PLL2_FIN) - dev_warn(component->dev, "clk %s only support %d Hz input\n", + dev_warn(rt5682->i2c_dev, "clk %s only support %d Hz input\n", clk_name, CLK_PLL2_FIN); /* @@ -2742,10 +2746,9 @@ static unsigned long rt5682_bclk_recalc_rate(struct clk_hw *hw, struct rt5682_priv *rt5682 = container_of(hw, struct rt5682_priv, dai_clks_hw[RT5682_DAI_BCLK_IDX]); - struct snd_soc_component *component = rt5682->component; unsigned int bclks_per_wclk; - bclks_per_wclk = snd_soc_component_read(component, RT5682_TDM_TCON_CTRL); + regmap_read(rt5682->regmap, RT5682_TDM_TCON_CTRL, &bclks_per_wclk); switch (bclks_per_wclk & RT5682_TDM_BCLK_MS1_MASK) { case RT5682_TDM_BCLK_MS1_256: @@ -2806,20 +2809,22 @@ static int rt5682_bclk_set_rate(struct clk_hw *hw, unsigned long rate, struct rt5682_priv *rt5682 = container_of(hw, struct rt5682_priv, dai_clks_hw[RT5682_DAI_BCLK_IDX]); - struct snd_soc_component *component = rt5682->component; + struct snd_soc_component *component; struct snd_soc_dai *dai; unsigned long factor; if (!rt5682_clk_check(rt5682)) return -EINVAL; + component = rt5682->component; + factor = rt5682_bclk_get_factor(rate, parent_rate); for_each_component_dais(component, dai) if (dai->id == RT5682_AIF1) break; if (!dai) { - dev_err(component->dev, "dai %d not found in component\n", + dev_err(rt5682->i2c_dev, "dai %d not found in component\n", RT5682_AIF1); return -ENODEV; } @@ -2842,10 +2847,9 @@ static const struct clk_ops rt5682_dai_clk_ops[RT5682_DAI_NUM_CLKS] = { }, }; -static int rt5682_register_dai_clks(struct snd_soc_component *component) +int rt5682_register_dai_clks(struct rt5682_priv *rt5682) { - struct device *dev = component->dev; - struct rt5682_priv *rt5682 = snd_soc_component_get_drvdata(component); + struct device *dev = rt5682->i2c_dev; struct rt5682_platform_data *pdata = &rt5682->pdata; struct clk_hw *dai_clk_hw; int i, ret; @@ -2905,6 +2909,7 @@ static int rt5682_register_dai_clks(struct snd_soc_component *component) return 0; } +EXPORT_SYMBOL_GPL(rt5682_register_dai_clks); #endif /* CONFIG_COMMON_CLK */ static int rt5682_probe(struct snd_soc_component *component) @@ -2914,9 +2919,6 @@ static int rt5682_probe(struct snd_soc_component *component) unsigned long time; struct snd_soc_dapm_context *dapm = &component->dapm; -#ifdef CONFIG_COMMON_CLK - int ret; -#endif rt5682->component = component; if (rt5682->is_sdw) { @@ -2928,26 +2930,6 @@ static int rt5682_probe(struct snd_soc_component *component) dev_err(&slave->dev, "Initialization not complete, timed out\n"); return -ETIMEDOUT; } - } else { -#ifdef CONFIG_COMMON_CLK - /* Check if MCLK provided */ - rt5682->mclk = devm_clk_get(component->dev, "mclk"); - if (IS_ERR(rt5682->mclk)) { - if (PTR_ERR(rt5682->mclk) != -ENOENT) { - ret = PTR_ERR(rt5682->mclk); - return ret; - } - rt5682->mclk = NULL; - } - - /* Register CCF DAI clock control */ - ret = rt5682_register_dai_clks(component); - if (ret) - return ret; - - /* Initial setup for CCF */ - rt5682->lrck[RT5682_AIF1] = CLK_48; -#endif } snd_soc_dapm_disable_pin(dapm, "MICBIAS"); diff --git a/sound/soc/codecs/rt5682.h b/sound/soc/codecs/rt5682.h index 539a9fe26294a8..52ff0d9c36c581 100644 --- a/sound/soc/codecs/rt5682.h +++ b/sound/soc/codecs/rt5682.h @@ -1428,6 +1428,7 @@ enum { struct rt5682_priv { struct snd_soc_component *component; + struct device *i2c_dev; struct rt5682_platform_data pdata; struct regmap *regmap; struct regmap *sdw_regmap; @@ -1481,6 +1482,8 @@ void rt5682_calibrate(struct rt5682_priv *rt5682); void rt5682_reset(struct rt5682_priv *rt5682); int rt5682_parse_dt(struct rt5682_priv *rt5682, struct device *dev); +int rt5682_register_dai_clks(struct rt5682_priv *rt5682); + #define RT5682_REG_NUM 318 extern const struct reg_default rt5682_reg[RT5682_REG_NUM]; From a976456c797cf3474d5f8853b1d38ffe3b76f1b2 Mon Sep 17 00:00:00 2001 From: Xiaomeng Tong Date: Sun, 27 Mar 2022 16:10:02 +0800 Subject: [PATCH 0380/1056] ASoC: rt5682: fix an incorrect NULL check on list iterator [ Upstream commit c8618d65007ba68d7891130642d73e89372101e8 ] The bug is here: if (!dai) { The list iterator value 'dai' will *always* be set and non-NULL by for_each_component_dais(), so it is incorrect to assume that the iterator value will be NULL if the list is empty or no element is found (In fact, it will be a bogus pointer to an invalid struct object containing the HEAD). Otherwise it will bypass the check 'if (!dai) {' (never call dev_err() and never return -ENODEV;) and lead to invalid memory access lately when calling 'rt5682_set_bclk1_ratio(dai, factor);'. To fix the bug, just return rt5682_set_bclk1_ratio(dai, factor); when found the 'dai', otherwise dev_err() and return -ENODEV; Cc: stable@vger.kernel.org Fixes: ebbfabc16d23d ("ASoC: rt5682: Add CCF usage for providing I2S clks") Signed-off-by: Xiaomeng Tong Link: https://lore.kernel.org/r/20220327081002.12684-1-xiam0nd.tong@gmail.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/codecs/rt5682.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/sound/soc/codecs/rt5682.c b/sound/soc/codecs/rt5682.c index 80d199843b8c53..8a9e1a4fa03ea2 100644 --- a/sound/soc/codecs/rt5682.c +++ b/sound/soc/codecs/rt5682.c @@ -2822,14 +2822,11 @@ static int rt5682_bclk_set_rate(struct clk_hw *hw, unsigned long rate, for_each_component_dais(component, dai) if (dai->id == RT5682_AIF1) - break; - if (!dai) { - dev_err(rt5682->i2c_dev, "dai %d not found in component\n", - RT5682_AIF1); - return -ENODEV; - } + return rt5682_set_bclk1_ratio(dai, factor); - return rt5682_set_bclk1_ratio(dai, factor); + dev_err(rt5682->i2c_dev, "dai %d not found in component\n", + RT5682_AIF1); + return -ENODEV; } static const struct clk_ops rt5682_dai_clk_ops[RT5682_DAI_NUM_CLKS] = { From f3647c369c178c1cdea7f6a60dc32d6118afac40 Mon Sep 17 00:00:00 2001 From: tiancyin Date: Sun, 27 Mar 2022 19:07:13 +0800 Subject: [PATCH 0381/1056] drm/amd/vcn: fix an error msg on vcn 3.0 [ Upstream commit 425d7a87e54ee358f580eaf10cf28dc95f7121c1 ] Some video card has more than one vcn instance, passing 0 to vcn_v3_0_pause_dpg_mode is incorrect. Error msg: Register(1) [mmUVD_POWER_STATUS] failed to reach value 0x00000001 != 0x00000002 Reviewed-by: James Zhu Signed-off-by: tiancyin Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c index 6e56bef4fdf811..1310617f030f72 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c @@ -1511,7 +1511,7 @@ static int vcn_v3_0_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx) struct dpg_pause_state state = {.fw_based = VCN_DPG_STATE__UNPAUSE}; uint32_t tmp; - vcn_v3_0_pause_dpg_mode(adev, 0, &state); + vcn_v3_0_pause_dpg_mode(adev, inst_idx, &state); /* Wait for power status to be 1 */ SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_POWER_STATUS, 1, From e73c0eaf7f3538a23c49d377a4d56758ab269834 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Wed, 6 Apr 2022 23:56:13 +0000 Subject: [PATCH 0382/1056] KVM: Don't create VM debugfs files outside of the VM directory [ Upstream commit a44a4cc1c969afec97dbb2aedaf6f38eaa6253bb ] Unfortunately, there is no guarantee that KVM was able to instantiate a debugfs directory for a particular VM. To that end, KVM shouldn't even attempt to create new debugfs files in this case. If the specified parent dentry is NULL, debugfs_create_file() will instantiate files at the root of debugfs. For arm64, it is possible to create the vgic-state file outside of a VM directory, the file is not cleaned up when a VM is destroyed. Nonetheless, the corresponding struct kvm is freed when the VM is destroyed. Nip the problem in the bud for all possible errant debugfs file creations by initializing kvm->debugfs_dentry to -ENOENT. In so doing, debugfs_create_file() will fail instead of creating the file in the root directory. Cc: stable@kernel.org Fixes: 929f45e32499 ("kvm: no need to check return value of debugfs_create functions") Signed-off-by: Oliver Upton Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20220406235615.1447180-2-oupton@google.com Signed-off-by: Sasha Levin --- virt/kvm/kvm_main.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 99c5915698156a..9134ae252d7c28 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -911,7 +911,7 @@ static void kvm_destroy_vm_debugfs(struct kvm *kvm) int kvm_debugfs_num_entries = kvm_vm_stats_header.num_desc + kvm_vcpu_stats_header.num_desc; - if (!kvm->debugfs_dentry) + if (IS_ERR(kvm->debugfs_dentry)) return; debugfs_remove_recursive(kvm->debugfs_dentry); @@ -934,6 +934,12 @@ static int kvm_create_vm_debugfs(struct kvm *kvm, int fd) int kvm_debugfs_num_entries = kvm_vm_stats_header.num_desc + kvm_vcpu_stats_header.num_desc; + /* + * Force subsequent debugfs file creations to fail if the VM directory + * is not created. + */ + kvm->debugfs_dentry = ERR_PTR(-ENOENT); + if (!debugfs_initialized()) return 0; @@ -5373,7 +5379,7 @@ static void kvm_uevent_notify_change(unsigned int type, struct kvm *kvm) } add_uevent_var(env, "PID=%d", kvm->userspace_pid); - if (kvm->debugfs_dentry) { + if (!IS_ERR(kvm->debugfs_dentry)) { char *tmp, *p = kmalloc(PATH_MAX, GFP_KERNEL_ACCOUNT); if (p) { From 88a4fb1346b3b11af33762522b954d5af09f3335 Mon Sep 17 00:00:00 2001 From: Zhenguo Zhao Date: Fri, 20 Aug 2021 20:17:47 +0800 Subject: [PATCH 0383/1056] tty: n_gsm: Modify CR,PF bit when config requester [ Upstream commit cc0f42122a7e7a5ede9c5f2a41199128b8449eda ] When n_gsm config "initiator=0",as requester,gsmld receives dlci SABM/DISC control command frame,but send UA frame is error. Example: Gsmld receive dlc0 SABM frame "f9 03 3f 01 1c f9",now it sends UA frame "f9 01 63 01 a3 f9",CR and PF bit are 0,but it should be set 1 from requster to initiator. Kernel test log as follows: Before modify [ 271.732031] c1 gsmld_receive: 00000000: f9 03 3f 01 1c f9 [ 271.741719] c1 <-- 0) C: SABM(P) [ 271.749483] c1 gsmld_output: 00000000: f9 01 63 01 a3 f9 [ 271.758337] c1 --> 0) R: UA(F) After modify [ 261.233188] c0 gsmld_receive: 00000000: f9 03 3f 01 1c f9 [ 261.242767] c0 <-- 0) C: SABM(P) [ 261.250497] c0 gsmld_output: 00000000: f9 03 73 01 d7 f9 [ 261.259759] c0 --> 0) C: UA(P) Signed-off-by: Zhenguo Zhao Link: https://lore.kernel.org/r/1629461872-26965-3-git-send-email-zhenguo6858@gmail.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/tty/n_gsm.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/tty/n_gsm.c b/drivers/tty/n_gsm.c index 6734ef22c304a2..91ce8e6e889aa3 100644 --- a/drivers/tty/n_gsm.c +++ b/drivers/tty/n_gsm.c @@ -625,7 +625,7 @@ static void gsm_send(struct gsm_mux *gsm, int addr, int cr, int control) static inline void gsm_response(struct gsm_mux *gsm, int addr, int control) { - gsm_send(gsm, addr, 0, control); + gsm_send(gsm, addr, 1, control); } /** @@ -1818,9 +1818,9 @@ static void gsm_queue(struct gsm_mux *gsm) if (dlci == NULL) return; if (dlci->dead) - gsm_response(gsm, address, DM); + gsm_response(gsm, address, DM|PF); else { - gsm_response(gsm, address, UA); + gsm_response(gsm, address, UA|PF); gsm_dlci_open(dlci); } break; @@ -1828,11 +1828,11 @@ static void gsm_queue(struct gsm_mux *gsm) if (cr == 0) goto invalid; if (dlci == NULL || dlci->state == DLCI_CLOSED) { - gsm_response(gsm, address, DM); + gsm_response(gsm, address, DM|PF); return; } /* Real close complete */ - gsm_response(gsm, address, UA); + gsm_response(gsm, address, UA|PF); gsm_dlci_close(dlci); break; case UA|PF: From 375dfcfca4a1bcd190a90c9cb0770f7b025f1001 Mon Sep 17 00:00:00 2001 From: Zhenguo Zhao Date: Fri, 20 Aug 2021 20:17:52 +0800 Subject: [PATCH 0384/1056] tty: n_gsm: Save dlci address open status when config requester [ Upstream commit 0b91b5332368f2fb0c3e5cfebc6aff9e167acd8b ] When n_gsm config "initiator=0",as requester ,receive SABM frame,n_gsm register gsmtty dev,and save dlci open address status,if receive DLC0 DISC or CLD frame,it can unregister the gsmtty dev by saving dlci address. Signed-off-by: Zhenguo Zhao Link: https://lore.kernel.org/r/1629461872-26965-8-git-send-email-zhenguo6858@gmail.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/tty/n_gsm.c | 57 +++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 53 insertions(+), 4 deletions(-) diff --git a/drivers/tty/n_gsm.c b/drivers/tty/n_gsm.c index 91ce8e6e889aa3..3038e5631be558 100644 --- a/drivers/tty/n_gsm.c +++ b/drivers/tty/n_gsm.c @@ -274,6 +274,10 @@ static DEFINE_SPINLOCK(gsm_mux_lock); static struct tty_driver *gsm_tty_driver; +/* Save dlci open address */ +static int addr_open[256] = { 0 }; +/* Save dlci open count */ +static int addr_cnt; /* * This section of the driver logic implements the GSM encodings * both the basic and the 'advanced'. Reliable transport is not @@ -1191,6 +1195,7 @@ static void gsm_control_rls(struct gsm_mux *gsm, const u8 *data, int clen) } static void gsm_dlci_begin_close(struct gsm_dlci *dlci); +static void gsm_dlci_close(struct gsm_dlci *dlci); /** * gsm_control_message - DLCI 0 control processing @@ -1209,15 +1214,28 @@ static void gsm_control_message(struct gsm_mux *gsm, unsigned int command, { u8 buf[1]; unsigned long flags; + struct gsm_dlci *dlci; + int i; + int address; switch (command) { case CMD_CLD: { - struct gsm_dlci *dlci = gsm->dlci[0]; + if (addr_cnt > 0) { + for (i = 0; i < addr_cnt; i++) { + address = addr_open[i]; + dlci = gsm->dlci[address]; + gsm_dlci_close(dlci); + addr_open[i] = 0; + } + } /* Modem wishes to close down */ + dlci = gsm->dlci[0]; if (dlci) { dlci->dead = true; gsm->dead = true; - gsm_dlci_begin_close(dlci); + gsm_dlci_close(dlci); + addr_cnt = 0; + gsm_response(gsm, 0, UA|PF); } } break; @@ -1780,6 +1798,7 @@ static void gsm_queue(struct gsm_mux *gsm) struct gsm_dlci *dlci; u8 cr; int address; + int i, j, k, address_tmp; /* We have to sneak a look at the packet body to do the FCS. A somewhat layering violation in the spec */ @@ -1822,6 +1841,11 @@ static void gsm_queue(struct gsm_mux *gsm) else { gsm_response(gsm, address, UA|PF); gsm_dlci_open(dlci); + /* Save dlci open address */ + if (address) { + addr_open[addr_cnt] = address; + addr_cnt++; + } } break; case DISC|PF: @@ -1832,8 +1856,33 @@ static void gsm_queue(struct gsm_mux *gsm) return; } /* Real close complete */ - gsm_response(gsm, address, UA|PF); - gsm_dlci_close(dlci); + if (!address) { + if (addr_cnt > 0) { + for (i = 0; i < addr_cnt; i++) { + address = addr_open[i]; + dlci = gsm->dlci[address]; + gsm_dlci_close(dlci); + addr_open[i] = 0; + } + } + dlci = gsm->dlci[0]; + gsm_dlci_close(dlci); + addr_cnt = 0; + gsm_response(gsm, 0, UA|PF); + } else { + gsm_response(gsm, address, UA|PF); + gsm_dlci_close(dlci); + /* clear dlci address */ + for (j = 0; j < addr_cnt; j++) { + address_tmp = addr_open[j]; + if (address_tmp == address) { + for (k = j; k < addr_cnt; k++) + addr_open[k] = addr_open[k+1]; + addr_cnt--; + break; + } + } + } break; case UA|PF: if (cr == 0 || dlci == NULL) From 6dcf1e5581b4014db03d7fd2508798964e88f663 Mon Sep 17 00:00:00 2001 From: Daniel Starke Date: Thu, 14 Apr 2022 02:42:11 -0700 Subject: [PATCH 0385/1056] tty: n_gsm: fix frame reception handling [ Upstream commit 7a0e4b1733b635026a87c023f6d703faf0095e39 ] The frame checksum (FCS) is currently handled in gsm_queue() after reception of a frame. However, this breaks layering. A workaround with 'received_fcs' was implemented so far. Furthermore, frames are handled as such even if no end flag was received. Move FCS calculation from gsm_queue() to gsm0_receive() and gsm1_receive(). Also delay gsm_queue() call there until a full frame was received to fix both points. Fixes: e1eaea46bb40 ("tty: n_gsm line discipline") Cc: stable@vger.kernel.org Signed-off-by: Daniel Starke Link: https://lore.kernel.org/r/20220414094225.4527-6-daniel.starke@siemens.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/tty/n_gsm.c | 53 +++++++++++++++++++++++++-------------------- 1 file changed, 30 insertions(+), 23 deletions(-) diff --git a/drivers/tty/n_gsm.c b/drivers/tty/n_gsm.c index 3038e5631be558..d3d5308daf35bf 100644 --- a/drivers/tty/n_gsm.c +++ b/drivers/tty/n_gsm.c @@ -221,7 +221,6 @@ struct gsm_mux { int encoding; u8 control; u8 fcs; - u8 received_fcs; u8 *txframe; /* TX framing buffer */ /* Method for the receiver side */ @@ -1799,18 +1798,7 @@ static void gsm_queue(struct gsm_mux *gsm) u8 cr; int address; int i, j, k, address_tmp; - /* We have to sneak a look at the packet body to do the FCS. - A somewhat layering violation in the spec */ - if ((gsm->control & ~PF) == UI) - gsm->fcs = gsm_fcs_add_block(gsm->fcs, gsm->buf, gsm->len); - if (gsm->encoding == 0) { - /* WARNING: gsm->received_fcs is used for - gsm->encoding = 0 only. - In this case it contain the last piece of data - required to generate final CRC */ - gsm->fcs = gsm_fcs_add(gsm->fcs, gsm->received_fcs); - } if (gsm->fcs != GOOD_FCS) { gsm->bad_fcs++; if (debug & 4) @@ -1997,19 +1985,25 @@ static void gsm0_receive(struct gsm_mux *gsm, unsigned char c) break; case GSM_DATA: /* Data */ gsm->buf[gsm->count++] = c; - if (gsm->count == gsm->len) + if (gsm->count == gsm->len) { + /* Calculate final FCS for UI frames over all data */ + if ((gsm->control & ~PF) != UIH) { + gsm->fcs = gsm_fcs_add_block(gsm->fcs, gsm->buf, + gsm->count); + } gsm->state = GSM_FCS; + } break; case GSM_FCS: /* FCS follows the packet */ - gsm->received_fcs = c; - gsm_queue(gsm); + gsm->fcs = gsm_fcs_add(gsm->fcs, c); gsm->state = GSM_SSOF; break; case GSM_SSOF: - if (c == GSM0_SOF) { - gsm->state = GSM_SEARCH; - break; - } + gsm->state = GSM_SEARCH; + if (c == GSM0_SOF) + gsm_queue(gsm); + else + gsm->bad_size++; break; default: pr_debug("%s: unhandled state: %d\n", __func__, gsm->state); @@ -2038,11 +2032,24 @@ static void gsm1_receive(struct gsm_mux *gsm, unsigned char c) return; } if (c == GSM1_SOF) { - /* EOF is only valid in frame if we have got to the data state - and received at least one byte (the FCS) */ - if (gsm->state == GSM_DATA && gsm->count) { - /* Extract the FCS */ + /* EOF is only valid in frame if we have got to the data state */ + if (gsm->state == GSM_DATA) { + if (gsm->count < 1) { + /* Missing FSC */ + gsm->malformed++; + gsm->state = GSM_START; + return; + } + /* Remove the FCS from data */ gsm->count--; + if ((gsm->control & ~PF) != UIH) { + /* Calculate final FCS for UI frames over all + * data but FCS + */ + gsm->fcs = gsm_fcs_add_block(gsm->fcs, gsm->buf, + gsm->count); + } + /* Add the FCS itself to test against GOOD_FCS */ gsm->fcs = gsm_fcs_add(gsm->fcs, gsm->buf[gsm->count]); gsm->len = gsm->count; gsm_queue(gsm); From a7fe6934ce7c5d311f6da15fba18bd2a432fe655 Mon Sep 17 00:00:00 2001 From: Johannes Schickel Date: Sat, 15 Jan 2022 15:02:57 +0100 Subject: [PATCH 0386/1056] ALSA: usb-audio: add mapping for MSI MPG X570S Carbon Max Wifi. [ Upstream commit 5762f980ca10dcfe5eead7c40d1c34cae61f409b ] The USB audio device 0db0:419c based on the Realtek ALC4080 chip exposes all playback volume controls as "PCM". This is makes distinguishing the individual functions hard. The added mapping distinguishes all playback volume controls as their respective function: - Speaker - for back panel output - Frontpanel Headphone - for front panel output - IEC958 - for digital output on the back panel This clarifies the individual volume control functions for users. Signed-off-by: Johannes Schickel Link: https://lore.kernel.org/r/20220115140257.8751-1-lordhoto@gmail.com Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/usb/mixer_maps.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/sound/usb/mixer_maps.c b/sound/usb/mixer_maps.c index 6ffd23f2ee65db..64fdca76b40e9f 100644 --- a/sound/usb/mixer_maps.c +++ b/sound/usb/mixer_maps.c @@ -423,6 +423,14 @@ static const struct usbmix_name_map aorus_master_alc1220vb_map[] = { {} }; +/* MSI MPG X570S Carbon Max Wifi with ALC4080 */ +static const struct usbmix_name_map msi_mpg_x570s_carbon_max_wifi_alc4080_map[] = { + { 29, "Speaker Playback" }, + { 30, "Front Headphone Playback" }, + { 32, "IEC958 Playback" }, + {} +}; + /* * Control map entries */ @@ -574,6 +582,10 @@ static const struct usbmix_ctl_map usbmix_ctl_maps[] = { .map = trx40_mobo_map, .connector_map = trx40_mobo_connector_map, }, + { /* MSI MPG X570S Carbon Max Wifi */ + .id = USB_ID(0x0db0, 0x419c), + .map = msi_mpg_x570s_carbon_max_wifi_alc4080_map, + }, { /* MSI TRX40 */ .id = USB_ID(0x0db0, 0x543d), .map = trx40_mobo_map, From 4db0a8dd906761bf7f14b40a267490c476651f69 Mon Sep 17 00:00:00 2001 From: Maurizio Avogadro Date: Mon, 18 Apr 2022 15:16:12 +0200 Subject: [PATCH 0387/1056] ALSA: usb-audio: add mapping for MSI MAG X570S Torpedo MAX. [ Upstream commit 4ddef9c4d70aae0c9029bdec7c3f7f1c1c51ff8c ] The USB audio device 0db0:a073 based on the Realtek ALC4080 chipset exposes all playback volume controls as "PCM". This makes distinguishing the individual functions hard. The mapping already adopted for device 0db0:419c based on the same chipset fixes the issue, apply it for this device too. Signed-off-by: Maurizio Avogadro Cc: Link: https://lore.kernel.org/r/Yl1ykPaGgsFf3SnW@ryzen Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/usb/mixer_maps.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sound/usb/mixer_maps.c b/sound/usb/mixer_maps.c index 64fdca76b40e9f..997425ef0a2946 100644 --- a/sound/usb/mixer_maps.c +++ b/sound/usb/mixer_maps.c @@ -586,6 +586,10 @@ static const struct usbmix_ctl_map usbmix_ctl_maps[] = { .id = USB_ID(0x0db0, 0x419c), .map = msi_mpg_x570s_carbon_max_wifi_alc4080_map, }, + { /* MSI MAG X570S Torpedo Max */ + .id = USB_ID(0x0db0, 0xa073), + .map = msi_mpg_x570s_carbon_max_wifi_alc4080_map, + }, { /* MSI TRX40 */ .id = USB_ID(0x0db0, 0x543d), .map = trx40_mobo_map, From 920e849b7d23ced84c9d11e11e2449e34973cfb8 Mon Sep 17 00:00:00 2001 From: Daniel Starke Date: Wed, 20 Apr 2022 03:13:44 -0700 Subject: [PATCH 0388/1056] tty: n_gsm: fix missing update of modem controls after DLCI open [ Upstream commit 48473802506d2d6151f59e0e764932b33b53cb3b ] Currently the peer is not informed about the initial state of the modem control lines after a new DLCI has been opened. Fix this by sending the initial modem control line states after DLCI open. Fixes: e1eaea46bb40 ("tty: n_gsm line discipline") Cc: stable@vger.kernel.org Signed-off-by: Daniel Starke Link: https://lore.kernel.org/r/20220420101346.3315-1-daniel.starke@siemens.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/tty/n_gsm.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/tty/n_gsm.c b/drivers/tty/n_gsm.c index d3d5308daf35bf..c52d5e0d5c6f5e 100644 --- a/drivers/tty/n_gsm.c +++ b/drivers/tty/n_gsm.c @@ -371,6 +371,7 @@ static const u8 gsm_fcs8[256] = { #define GOOD_FCS 0xCF static int gsmld_output(struct gsm_mux *gsm, u8 *data, int len); +static int gsmtty_modem_update(struct gsm_dlci *dlci, u8 brk); /** * gsm_fcs_add - update FCS @@ -1489,6 +1490,9 @@ static void gsm_dlci_open(struct gsm_dlci *dlci) dlci->state = DLCI_OPEN; if (debug & 8) pr_debug("DLCI %d goes open.\n", dlci->addr); + /* Send current modem state */ + if (dlci->addr) + gsmtty_modem_update(dlci, 0); wake_up(&dlci->gsm->event); } From 1519e6e28478ce7b7a31dde9ae1b0faed8854dc2 Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Tue, 7 Dec 2021 06:28:34 -0800 Subject: [PATCH 0389/1056] btrfs: zoned: encapsulate inode locking for zoned relocation [ Upstream commit 869f4cdc73f9378986755030c684c011f0b71517 ] Encapsulate the inode lock needed for serializing the data relocation writes on a zoned filesystem into a helper. This streamlines the code reading flow and hides special casing for zoned filesystems. Reviewed-by: Josef Bacik Signed-off-by: Johannes Thumshirn Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Sasha Levin --- fs/btrfs/extent_io.c | 8 ++------ fs/btrfs/zoned.h | 17 +++++++++++++++++ 2 files changed, 19 insertions(+), 6 deletions(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 6dd375ed6e3d55..059bd0753e27f6 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -5139,8 +5139,6 @@ int extent_writepages(struct address_space *mapping, struct writeback_control *wbc) { struct inode *inode = mapping->host; - const bool data_reloc = btrfs_is_data_reloc_root(BTRFS_I(inode)->root); - const bool zoned = btrfs_is_zoned(BTRFS_I(inode)->root->fs_info); int ret = 0; struct extent_page_data epd = { .bio_ctrl = { 0 }, @@ -5152,11 +5150,9 @@ int extent_writepages(struct address_space *mapping, * Allow only a single thread to do the reloc work in zoned mode to * protect the write pointer updates. */ - if (data_reloc && zoned) - btrfs_inode_lock(inode, 0); + btrfs_zoned_data_reloc_lock(BTRFS_I(inode)); ret = extent_write_cache_pages(mapping, wbc, &epd); - if (data_reloc && zoned) - btrfs_inode_unlock(inode, 0); + btrfs_zoned_data_reloc_unlock(BTRFS_I(inode)); ASSERT(ret <= 0); if (ret < 0) { end_write_bio(&epd, ret); diff --git a/fs/btrfs/zoned.h b/fs/btrfs/zoned.h index 813aa3cddc11fc..d680c3ee918abd 100644 --- a/fs/btrfs/zoned.h +++ b/fs/btrfs/zoned.h @@ -8,6 +8,7 @@ #include "volumes.h" #include "disk-io.h" #include "block-group.h" +#include "btrfs_inode.h" /* * Block groups with more than this value (percents) of unusable space will be @@ -324,4 +325,20 @@ static inline void btrfs_clear_treelog_bg(struct btrfs_block_group *bg) spin_unlock(&fs_info->treelog_bg_lock); } +static inline void btrfs_zoned_data_reloc_lock(struct btrfs_inode *inode) +{ + struct btrfs_root *root = inode->root; + + if (btrfs_is_data_reloc_root(root) && btrfs_is_zoned(root->fs_info)) + btrfs_inode_lock(&inode->vfs_inode, 0); +} + +static inline void btrfs_zoned_data_reloc_unlock(struct btrfs_inode *inode) +{ + struct btrfs_root *root = inode->root; + + if (btrfs_is_data_reloc_root(root) && btrfs_is_zoned(root->fs_info)) + btrfs_inode_unlock(&inode->vfs_inode, 0); +} + #endif From 70e2e87ea878bda72c361131c0853fbd39162f09 Mon Sep 17 00:00:00 2001 From: Naohiro Aota Date: Mon, 18 Apr 2022 16:15:03 +0900 Subject: [PATCH 0390/1056] btrfs: zoned: use dedicated lock for data relocation [ Upstream commit 5f0addf7b89085f8e0a2593faa419d6111612b9b ] Currently, we use btrfs_inode_{lock,unlock}() to grant an exclusive writeback of the relocation data inode in btrfs_zoned_data_reloc_{lock,unlock}(). However, that can cause a deadlock in the following path. Thread A takes btrfs_inode_lock() and waits for metadata reservation by e.g, waiting for writeback: prealloc_file_extent_cluster() - btrfs_inode_lock(&inode->vfs_inode, 0); - btrfs_prealloc_file_range() ... - btrfs_replace_file_extents() - btrfs_start_transaction ... - btrfs_reserve_metadata_bytes() Thread B (e.g, doing a writeback work) needs to wait for the inode lock to continue writeback process: do_writepages - btrfs_writepages - extent_writpages - btrfs_zoned_data_reloc_lock(BTRFS_I(inode)); - btrfs_inode_lock() The deadlock is caused by relying on the vfs_inode's lock. By using it, we introduced unnecessary exclusion of writeback and btrfs_prealloc_file_range(). Also, the lock at this point is useless as we don't have any dirty pages in the inode yet. Introduce fs_info->zoned_data_reloc_io_lock and use it for the exclusive writeback. Fixes: 35156d852762 ("btrfs: zoned: only allow one process to add pages to a relocation inode") CC: stable@vger.kernel.org # 5.16.x: 869f4cdc73f9: btrfs: zoned: encapsulate inode locking for zoned relocation CC: stable@vger.kernel.org # 5.16.x CC: stable@vger.kernel.org # 5.17 Cc: Johannes Thumshirn Reviewed-by: Johannes Thumshirn Signed-off-by: Naohiro Aota Signed-off-by: David Sterba Signed-off-by: Sasha Levin --- fs/btrfs/ctree.h | 1 + fs/btrfs/disk-io.c | 1 + fs/btrfs/zoned.h | 4 ++-- 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index cc72d8981c4785..d1838de0b39c06 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1027,6 +1027,7 @@ struct btrfs_fs_info { */ spinlock_t relocation_bg_lock; u64 data_reloc_bg; + struct mutex zoned_data_reloc_io_lock; #ifdef CONFIG_BTRFS_FS_REF_VERIFY spinlock_t ref_verify_lock; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 233d894f6febaf..909d19656316f5 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2914,6 +2914,7 @@ void btrfs_init_fs_info(struct btrfs_fs_info *fs_info) mutex_init(&fs_info->reloc_mutex); mutex_init(&fs_info->delalloc_root_mutex); mutex_init(&fs_info->zoned_meta_io_lock); + mutex_init(&fs_info->zoned_data_reloc_io_lock); seqlock_init(&fs_info->profiles_lock); INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots); diff --git a/fs/btrfs/zoned.h b/fs/btrfs/zoned.h index d680c3ee918abd..3a826f7c204030 100644 --- a/fs/btrfs/zoned.h +++ b/fs/btrfs/zoned.h @@ -330,7 +330,7 @@ static inline void btrfs_zoned_data_reloc_lock(struct btrfs_inode *inode) struct btrfs_root *root = inode->root; if (btrfs_is_data_reloc_root(root) && btrfs_is_zoned(root->fs_info)) - btrfs_inode_lock(&inode->vfs_inode, 0); + mutex_lock(&root->fs_info->zoned_data_reloc_io_lock); } static inline void btrfs_zoned_data_reloc_unlock(struct btrfs_inode *inode) @@ -338,7 +338,7 @@ static inline void btrfs_zoned_data_reloc_unlock(struct btrfs_inode *inode) struct btrfs_root *root = inode->root; if (btrfs_is_data_reloc_root(root) && btrfs_is_zoned(root->fs_info)) - btrfs_inode_unlock(&inode->vfs_inode, 0); + mutex_unlock(&root->fs_info->zoned_data_reloc_io_lock); } #endif From a0f4fd486896491637ff84e1591fb76ee6fee3ed Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 15 Apr 2022 00:46:22 +0000 Subject: [PATCH 0391/1056] KVM: Initialize debugfs_dentry when a VM is created to avoid NULL deref [ Upstream commit 5c697c367a66307a5d943c3449421aff2aa3ca4a ] Initialize debugfs_entry to its semi-magical -ENOENT value when the VM is created. KVM's teardown when VM creation fails is kludgy and calls kvm_uevent_notify_change() and kvm_destroy_vm_debugfs() even if KVM never attempted kvm_create_vm_debugfs(). Because debugfs_entry is zero initialized, the IS_ERR() checks pass and KVM derefs a NULL pointer. BUG: kernel NULL pointer dereference, address: 0000000000000018 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 1068b1067 P4D 1068b1067 PUD 1068b0067 PMD 0 Oops: 0000 [#1] SMP CPU: 0 PID: 871 Comm: repro Not tainted 5.18.0-rc1+ #825 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 0.0.0 02/06/2015 RIP: 0010:__dentry_path+0x7b/0x130 Call Trace: dentry_path_raw+0x42/0x70 kvm_uevent_notify_change.part.0+0x10c/0x200 [kvm] kvm_put_kvm+0x63/0x2b0 [kvm] kvm_dev_ioctl+0x43a/0x920 [kvm] __x64_sys_ioctl+0x83/0xb0 do_syscall_64+0x31/0x50 entry_SYSCALL_64_after_hwframe+0x44/0xae Modules linked in: kvm_intel kvm irqbypass Fixes: a44a4cc1c969 ("KVM: Don't create VM debugfs files outside of the VM directory") Cc: stable@vger.kernel.org Cc: Marc Zyngier Cc: Oliver Upton Reported-by: syzbot+df6fbbd2ee39f21289ef@syzkaller.appspotmail.com Signed-off-by: Sean Christopherson Reviewed-by: Oliver Upton Message-Id: <20220415004622.2207751-1-seanjc@google.com> Signed-off-by: Paolo Bonzini Signed-off-by: Sasha Levin --- virt/kvm/kvm_main.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 9134ae252d7c28..9eac68ae291e3d 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -934,12 +934,6 @@ static int kvm_create_vm_debugfs(struct kvm *kvm, int fd) int kvm_debugfs_num_entries = kvm_vm_stats_header.num_desc + kvm_vcpu_stats_header.num_desc; - /* - * Force subsequent debugfs file creations to fail if the VM directory - * is not created. - */ - kvm->debugfs_dentry = ERR_PTR(-ENOENT); - if (!debugfs_initialized()) return 0; @@ -1055,6 +1049,12 @@ static struct kvm *kvm_create_vm(unsigned long type) BUILD_BUG_ON(KVM_MEM_SLOTS_NUM > SHRT_MAX); + /* + * Force subsequent debugfs file creations to fail if the VM directory + * is not created (by kvm_create_vm_debugfs()). + */ + kvm->debugfs_dentry = ERR_PTR(-ENOENT); + if (init_srcu_struct(&kvm->srcu)) goto out_err_no_srcu; if (init_srcu_struct(&kvm->irq_srcu)) From d04b62b64056ce2b5be69bb53d7e0084782fc562 Mon Sep 17 00:00:00 2001 From: Naoya Horiguchi Date: Fri, 14 Jan 2022 14:09:02 -0800 Subject: [PATCH 0392/1056] mm/hwpoison: mf_mutex for soft offline and unpoison [ Upstream commit 91d005479e06392617bacc114509d611b705eaac ] Patch series "mm/hwpoison: fix unpoison_memory()", v4. The main purpose of this series is to sync unpoison code to recent changes around how hwpoison code takes page refcount. Unpoison should work or simply fail (without crash) if impossible. The recent works of keeping hwpoison pages in shmem pagecache introduce a new state of hwpoisoned pages, but unpoison for such pages is not supported yet with this series. It seems that soft-offline and unpoison can be used as general purpose page offline/online mechanism (not in the context of memory error). I think that we need some additional works to realize it because currently soft-offline and unpoison are assumed not to happen so frequently (print out too many messages for aggressive usecases). But anyway this could be another interesting next topic. v1: https://lore.kernel.org/linux-mm/20210614021212.223326-1-nao.horiguchi@gmail.com/ v2: https://lore.kernel.org/linux-mm/20211025230503.2650970-1-naoya.horiguchi@linux.dev/ v3: https://lore.kernel.org/linux-mm/20211105055058.3152564-1-naoya.horiguchi@linux.dev/ This patch (of 3): Originally mf_mutex is introduced to serialize multiple MCE events, but it is not that useful to allow unpoison to run in parallel with memory_failure() and soft offline. So apply mf_mutex to soft offline and unpoison. The memory failure handler and soft offline handler get simpler with this. Link: https://lkml.kernel.org/r/20211115084006.3728254-1-naoya.horiguchi@linux.dev Link: https://lkml.kernel.org/r/20211115084006.3728254-2-naoya.horiguchi@linux.dev Signed-off-by: Naoya Horiguchi Reviewed-by: Yang Shi Cc: "Aneesh Kumar K.V" Cc: David Hildenbrand Cc: Ding Hui Cc: Miaohe Lin Cc: Michal Hocko Cc: Oscar Salvador Cc: Peter Xu Cc: Tony Luck Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- mm/memory-failure.c | 62 +++++++++++++-------------------------------- 1 file changed, 18 insertions(+), 44 deletions(-) diff --git a/mm/memory-failure.c b/mm/memory-failure.c index c3ceb743693349..e6425d959fa9c2 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -1463,14 +1463,6 @@ static int memory_failure_hugetlb(unsigned long pfn, int flags) lock_page(head); page_flags = head->flags; - if (!PageHWPoison(head)) { - pr_err("Memory failure: %#lx: just unpoisoned\n", pfn); - num_poisoned_pages_dec(); - unlock_page(head); - put_page(head); - return 0; - } - /* * TODO: hwpoison for pud-sized hugetlb doesn't work right now, so * simply disable it. In order to make it work properly, we need @@ -1584,6 +1576,8 @@ static int memory_failure_dev_pagemap(unsigned long pfn, int flags, return rc; } +static DEFINE_MUTEX(mf_mutex); + /** * memory_failure - Handle memory failure of a page. * @pfn: Page Number of the corrupted page @@ -1610,7 +1604,6 @@ int memory_failure(unsigned long pfn, int flags) int res = 0; unsigned long page_flags; bool retry = true; - static DEFINE_MUTEX(mf_mutex); if (!sysctl_memory_failure_recovery) panic("Memory failure on page %lx", pfn); @@ -1744,16 +1737,6 @@ int memory_failure(unsigned long pfn, int flags) */ page_flags = p->flags; - /* - * unpoison always clear PG_hwpoison inside page lock - */ - if (!PageHWPoison(p)) { - pr_err("Memory failure: %#lx: just unpoisoned\n", pfn); - num_poisoned_pages_dec(); - unlock_page(p); - put_page(p); - goto unlock_mutex; - } if (hwpoison_filter(p)) { if (TestClearPageHWPoison(p)) num_poisoned_pages_dec(); @@ -1934,6 +1917,7 @@ int unpoison_memory(unsigned long pfn) struct page *page; struct page *p; int freeit = 0; + int ret = 0; unsigned long flags = 0; static DEFINE_RATELIMIT_STATE(unpoison_rs, DEFAULT_RATELIMIT_INTERVAL, DEFAULT_RATELIMIT_BURST); @@ -1944,39 +1928,30 @@ int unpoison_memory(unsigned long pfn) p = pfn_to_page(pfn); page = compound_head(p); + mutex_lock(&mf_mutex); + if (!PageHWPoison(p)) { unpoison_pr_info("Unpoison: Page was already unpoisoned %#lx\n", pfn, &unpoison_rs); - return 0; + goto unlock_mutex; } if (page_count(page) > 1) { unpoison_pr_info("Unpoison: Someone grabs the hwpoison page %#lx\n", pfn, &unpoison_rs); - return 0; + goto unlock_mutex; } if (page_mapped(page)) { unpoison_pr_info("Unpoison: Someone maps the hwpoison page %#lx\n", pfn, &unpoison_rs); - return 0; + goto unlock_mutex; } if (page_mapping(page)) { unpoison_pr_info("Unpoison: the hwpoison page has non-NULL mapping %#lx\n", pfn, &unpoison_rs); - return 0; - } - - /* - * unpoison_memory() can encounter thp only when the thp is being - * worked by memory_failure() and the page lock is not held yet. - * In such case, we yield to memory_failure() and make unpoison fail. - */ - if (!PageHuge(page) && PageTransHuge(page)) { - unpoison_pr_info("Unpoison: Memory failure is now running on %#lx\n", - pfn, &unpoison_rs); - return 0; + goto unlock_mutex; } if (!get_hwpoison_page(p, flags)) { @@ -1984,29 +1959,23 @@ int unpoison_memory(unsigned long pfn) num_poisoned_pages_dec(); unpoison_pr_info("Unpoison: Software-unpoisoned free page %#lx\n", pfn, &unpoison_rs); - return 0; + goto unlock_mutex; } - lock_page(page); - /* - * This test is racy because PG_hwpoison is set outside of page lock. - * That's acceptable because that won't trigger kernel panic. Instead, - * the PG_hwpoison page will be caught and isolated on the entrance to - * the free buddy page pool. - */ if (TestClearPageHWPoison(page)) { unpoison_pr_info("Unpoison: Software-unpoisoned page %#lx\n", pfn, &unpoison_rs); num_poisoned_pages_dec(); freeit = 1; } - unlock_page(page); put_page(page); if (freeit && !(pfn == my_zero_pfn(0) && page_count(p) == 1)) put_page(page); - return 0; +unlock_mutex: + mutex_unlock(&mf_mutex); + return ret; } EXPORT_SYMBOL(unpoison_memory); @@ -2187,9 +2156,12 @@ int soft_offline_page(unsigned long pfn, int flags) return -EIO; } + mutex_lock(&mf_mutex); + if (PageHWPoison(page)) { pr_info("%s: %#lx page already poisoned\n", __func__, pfn); put_ref_page(ref_page); + mutex_unlock(&mf_mutex); return 0; } @@ -2208,5 +2180,7 @@ int soft_offline_page(unsigned long pfn, int flags) } } + mutex_unlock(&mf_mutex); + return ret; } From 7a07875fabccc17c4564151c19de2174948d0544 Mon Sep 17 00:00:00 2001 From: luofei Date: Tue, 22 Mar 2022 14:44:38 -0700 Subject: [PATCH 0393/1056] mm/hwpoison: avoid the impact of hwpoison_filter() return value on mce handler [ Upstream commit d1fe111fb62a1cf0446a2919f5effbb33ad0702c ] When the hwpoison page meets the filter conditions, it should not be regarded as successful memory_failure() processing for mce handler, but should return a distinct value, otherwise mce handler regards the error page has been identified and isolated, which may lead to calling set_mce_nospec() to change page attribute, etc. Here memory_failure() return -EOPNOTSUPP to indicate that the error event is filtered, mce handler should not take any action for this situation and hwpoison injector should treat as correct. Link: https://lkml.kernel.org/r/20220223082135.2769649-1-luofei@unicloud.com Signed-off-by: luofei Acked-by: Borislav Petkov Cc: Dave Hansen Cc: H. Peter Anvin Cc: Ingo Molnar Cc: Miaohe Lin Cc: Naoya Horiguchi Cc: Thomas Gleixner Cc: Tony Luck Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- arch/x86/kernel/cpu/mce/core.c | 8 +++++--- drivers/base/memory.c | 2 ++ mm/hwpoison-inject.c | 3 ++- mm/madvise.c | 2 ++ mm/memory-failure.c | 9 +++++++-- 5 files changed, 18 insertions(+), 6 deletions(-) diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c index e23e74e2f928d2..848cfb013f58c2 100644 --- a/arch/x86/kernel/cpu/mce/core.c +++ b/arch/x86/kernel/cpu/mce/core.c @@ -1297,10 +1297,12 @@ static void kill_me_maybe(struct callback_head *cb) /* * -EHWPOISON from memory_failure() means that it already sent SIGBUS - * to the current process with the proper error info, so no need to - * send SIGBUS here again. + * to the current process with the proper error info, + * -EOPNOTSUPP means hwpoison_filter() filtered the error event, + * + * In both cases, no further processing is required. */ - if (ret == -EHWPOISON) + if (ret == -EHWPOISON || ret == -EOPNOTSUPP) return; if (p->mce_vaddr != (void __user *)-1l) { diff --git a/drivers/base/memory.c b/drivers/base/memory.c index c0d501a3a71400..c778d1df745579 100644 --- a/drivers/base/memory.c +++ b/drivers/base/memory.c @@ -555,6 +555,8 @@ static ssize_t hard_offline_page_store(struct device *dev, return -EINVAL; pfn >>= PAGE_SHIFT; ret = memory_failure(pfn, 0); + if (ret == -EOPNOTSUPP) + ret = 0; return ret ? ret : count; } diff --git a/mm/hwpoison-inject.c b/mm/hwpoison-inject.c index aff4d27ec23528..a1d6fc3c78b9c9 100644 --- a/mm/hwpoison-inject.c +++ b/mm/hwpoison-inject.c @@ -48,7 +48,8 @@ static int hwpoison_inject(void *data, u64 val) inject: pr_info("Injecting memory failure at pfn %#lx\n", pfn); - return memory_failure(pfn, 0); + err = memory_failure(pfn, 0); + return (err == -EOPNOTSUPP) ? 0 : err; } static int hwpoison_unpoison(void *data, u64 val) diff --git a/mm/madvise.c b/mm/madvise.c index 8e5ca01a6cc07d..882767d58c2739 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -968,6 +968,8 @@ static int madvise_inject_error(int behavior, pr_info("Injecting memory failure for pfn %#lx at process virtual address %#lx\n", pfn, start); ret = memory_failure(pfn, MF_COUNT_INCREASED); + if (ret == -EOPNOTSUPP) + ret = 0; } if (ret) diff --git a/mm/memory-failure.c b/mm/memory-failure.c index e6425d959fa9c2..5664bafd5e7716 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -1444,7 +1444,7 @@ static int memory_failure_hugetlb(unsigned long pfn, int flags) if (TestClearPageHWPoison(head)) num_poisoned_pages_dec(); unlock_page(head); - return 0; + return -EOPNOTSUPP; } unlock_page(head); res = MF_FAILED; @@ -1525,7 +1525,7 @@ static int memory_failure_dev_pagemap(unsigned long pfn, int flags, goto out; if (hwpoison_filter(page)) { - rc = 0; + rc = -EOPNOTSUPP; goto unlock; } @@ -1594,6 +1594,10 @@ static DEFINE_MUTEX(mf_mutex); * * Must run in process context (e.g. a work queue) with interrupts * enabled and no spinlocks hold. + * + * Return: 0 for successfully handled the memory error, + * -EOPNOTSUPP for memory_filter() filtered the error event, + * < 0(except -EOPNOTSUPP) on failure. */ int memory_failure(unsigned long pfn, int flags) { @@ -1742,6 +1746,7 @@ int memory_failure(unsigned long pfn, int flags) num_poisoned_pages_dec(); unlock_page(p); put_page(p); + res = -EOPNOTSUPP; goto unlock_mutex; } From 5429eb5502fc7e64abdb3a4f6b9f698a8337da5d Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Tue, 22 Mar 2022 14:44:44 -0700 Subject: [PATCH 0394/1056] mm/memory-failure.c: fix race with changing page compound again [ Upstream commit 888af2701db79b9b27c7e37f9ede528a5ca53b76 ] Patch series "A few fixup patches for memory failure", v2. This series contains a few patches to fix the race with changing page compound page, make non-LRU movable pages unhandlable and so on. More details can be found in the respective changelogs. There is a race window where we got the compound_head, the hugetlb page could be freed to buddy, or even changed to another compound page just before we try to get hwpoison page. Think about the below race window: CPU 1 CPU 2 memory_failure_hugetlb struct page *head = compound_head(p); hugetlb page might be freed to buddy, or even changed to another compound page. get_hwpoison_page -- page is not what we want now... If this race happens, just bail out. Also MF_MSG_DIFFERENT_PAGE_SIZE is introduced to record this event. [akpm@linux-foundation.org: s@/**@/*@, per Naoya Horiguchi] Link: https://lkml.kernel.org/r/20220312074613.4798-1-linmiaohe@huawei.com Link: https://lkml.kernel.org/r/20220312074613.4798-2-linmiaohe@huawei.com Signed-off-by: Miaohe Lin Acked-by: Naoya Horiguchi Cc: Tony Luck Cc: Borislav Petkov Cc: Mike Kravetz Cc: Yang Shi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- include/linux/mm.h | 1 + include/ras/ras_event.h | 1 + mm/memory-failure.c | 12 ++++++++++++ 3 files changed, 14 insertions(+) diff --git a/include/linux/mm.h b/include/linux/mm.h index 85205adcdd0d16..7a80a08eec847c 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -3167,6 +3167,7 @@ enum mf_action_page_type { MF_MSG_BUDDY_2ND, MF_MSG_DAX, MF_MSG_UNSPLIT_THP, + MF_MSG_DIFFERENT_PAGE_SIZE, MF_MSG_UNKNOWN, }; diff --git a/include/ras/ras_event.h b/include/ras/ras_event.h index 0bdbc0d17d2fa2..cac13ff1d6eb4e 100644 --- a/include/ras/ras_event.h +++ b/include/ras/ras_event.h @@ -376,6 +376,7 @@ TRACE_EVENT(aer_event, EM ( MF_MSG_BUDDY_2ND, "free buddy page (2nd try)" ) \ EM ( MF_MSG_DAX, "dax page" ) \ EM ( MF_MSG_UNSPLIT_THP, "unsplit thp" ) \ + EM ( MF_MSG_DIFFERENT_PAGE_SIZE, "different page size" ) \ EMe ( MF_MSG_UNKNOWN, "unknown page" ) /* diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 5664bafd5e7716..a4d70c21c1469f 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -741,6 +741,7 @@ static const char * const action_page_types[] = { [MF_MSG_BUDDY_2ND] = "free buddy page (2nd try)", [MF_MSG_DAX] = "dax page", [MF_MSG_UNSPLIT_THP] = "unsplit thp", + [MF_MSG_DIFFERENT_PAGE_SIZE] = "different page size", [MF_MSG_UNKNOWN] = "unknown page", }; @@ -1461,6 +1462,17 @@ static int memory_failure_hugetlb(unsigned long pfn, int flags) } lock_page(head); + + /* + * The page could have changed compound pages due to race window. + * If this happens just bail out. + */ + if (!PageHuge(p) || compound_head(p) != head) { + action_result(pfn, MF_MSG_DIFFERENT_PAGE_SIZE, MF_IGNORED); + res = -EBUSY; + goto out; + } + page_flags = head->flags; /* From 62d1655b922958826b7ec22682c3141746f75064 Mon Sep 17 00:00:00 2001 From: Naoya Horiguchi Date: Thu, 21 Apr 2022 16:35:33 -0700 Subject: [PATCH 0395/1056] mm/hwpoison: fix race between hugetlb free/demotion and memory_failure_hugetlb() [ Upstream commit 405ce051236cc65b30bbfe490b28ce60ae6aed85 ] There is a race condition between memory_failure_hugetlb() and hugetlb free/demotion, which causes setting PageHWPoison flag on the wrong page. The one simple result is that wrong processes can be killed, but another (more serious) one is that the actual error is left unhandled, so no one prevents later access to it, and that might lead to more serious results like consuming corrupted data. Think about the below race window: CPU 1 CPU 2 memory_failure_hugetlb struct page *head = compound_head(p); hugetlb page might be freed to buddy, or even changed to another compound page. get_hwpoison_page -- page is not what we want now... The current code first does prechecks roughly and then reconfirms after taking refcount, but it's found that it makes code overly complicated, so move the prechecks in a single hugetlb_lock range. A newly introduced function, try_memory_failure_hugetlb(), always takes hugetlb_lock (even for non-hugetlb pages). That can be improved, but memory_failure() is rare in principle, so should not be a big problem. Link: https://lkml.kernel.org/r/20220408135323.1559401-2-naoya.horiguchi@linux.dev Fixes: 761ad8d7c7b5 ("mm: hwpoison: introduce memory_failure_hugetlb()") Signed-off-by: Naoya Horiguchi Reported-by: Mike Kravetz Reviewed-by: Miaohe Lin Reviewed-by: Mike Kravetz Cc: Yang Shi Cc: Dan Carpenter Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- include/linux/hugetlb.h | 6 ++ include/linux/mm.h | 8 +++ mm/hugetlb.c | 10 +++ mm/memory-failure.c | 137 ++++++++++++++++++++++++++++++---------- 4 files changed, 127 insertions(+), 34 deletions(-) diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 1faebe1cd0ed5b..22c1d935e22dd7 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -167,6 +167,7 @@ long hugetlb_unreserve_pages(struct inode *inode, long start, long end, long freed); bool isolate_huge_page(struct page *page, struct list_head *list); int get_hwpoison_huge_page(struct page *page, bool *hugetlb); +int get_huge_page_for_hwpoison(unsigned long pfn, int flags); void putback_active_hugepage(struct page *page); void move_hugetlb_state(struct page *oldpage, struct page *newpage, int reason); void free_huge_page(struct page *page); @@ -362,6 +363,11 @@ static inline int get_hwpoison_huge_page(struct page *page, bool *hugetlb) return 0; } +static inline int get_huge_page_for_hwpoison(unsigned long pfn, int flags) +{ + return 0; +} + static inline void putback_active_hugepage(struct page *page) { } diff --git a/include/linux/mm.h b/include/linux/mm.h index 7a80a08eec847c..c5fa46e9c0ca4e 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -3132,6 +3132,14 @@ extern int sysctl_memory_failure_recovery; extern void shake_page(struct page *p); extern atomic_long_t num_poisoned_pages __read_mostly; extern int soft_offline_page(unsigned long pfn, int flags); +#ifdef CONFIG_MEMORY_FAILURE +extern int __get_huge_page_for_hwpoison(unsigned long pfn, int flags); +#else +static inline int __get_huge_page_for_hwpoison(unsigned long pfn, int flags) +{ + return 0; +} +#endif /* diff --git a/mm/hugetlb.c b/mm/hugetlb.c index e4c717b08cfe0e..eed96302897aeb 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -6290,6 +6290,16 @@ int get_hwpoison_huge_page(struct page *page, bool *hugetlb) return ret; } +int get_huge_page_for_hwpoison(unsigned long pfn, int flags) +{ + int ret; + + spin_lock_irq(&hugetlb_lock); + ret = __get_huge_page_for_hwpoison(pfn, flags); + spin_unlock_irq(&hugetlb_lock); + return ret; +} + void putback_active_hugepage(struct page *page) { spin_lock_irq(&hugetlb_lock); diff --git a/mm/memory-failure.c b/mm/memory-failure.c index a4d70c21c1469f..ecd64b203272b1 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -1419,50 +1419,113 @@ static int try_to_split_thp_page(struct page *page, const char *msg) return 0; } -static int memory_failure_hugetlb(unsigned long pfn, int flags) +/* + * Called from hugetlb code with hugetlb_lock held. + * + * Return values: + * 0 - free hugepage + * 1 - in-use hugepage + * 2 - not a hugepage + * -EBUSY - the hugepage is busy (try to retry) + * -EHWPOISON - the hugepage is already hwpoisoned + */ +int __get_huge_page_for_hwpoison(unsigned long pfn, int flags) +{ + struct page *page = pfn_to_page(pfn); + struct page *head = compound_head(page); + int ret = 2; /* fallback to normal page handling */ + bool count_increased = false; + + if (!PageHeadHuge(head)) + goto out; + + if (flags & MF_COUNT_INCREASED) { + ret = 1; + count_increased = true; + } else if (HPageFreed(head) || HPageMigratable(head)) { + ret = get_page_unless_zero(head); + if (ret) + count_increased = true; + } else { + ret = -EBUSY; + goto out; + } + + if (TestSetPageHWPoison(head)) { + ret = -EHWPOISON; + goto out; + } + + return ret; +out: + if (count_increased) + put_page(head); + return ret; +} + +#ifdef CONFIG_HUGETLB_PAGE +/* + * Taking refcount of hugetlb pages needs extra care about race conditions + * with basic operations like hugepage allocation/free/demotion. + * So some of prechecks for hwpoison (pinning, and testing/setting + * PageHWPoison) should be done in single hugetlb_lock range. + */ +static int try_memory_failure_hugetlb(unsigned long pfn, int flags, int *hugetlb) { - struct page *p = pfn_to_page(pfn); - struct page *head = compound_head(p); int res; + struct page *p = pfn_to_page(pfn); + struct page *head; unsigned long page_flags; + bool retry = true; - if (TestSetPageHWPoison(head)) { - pr_err("Memory failure: %#lx: already hardware poisoned\n", - pfn); - res = -EHWPOISON; - if (flags & MF_ACTION_REQUIRED) + *hugetlb = 1; +retry: + res = get_huge_page_for_hwpoison(pfn, flags); + if (res == 2) { /* fallback to normal page handling */ + *hugetlb = 0; + return 0; + } else if (res == -EHWPOISON) { + pr_err("Memory failure: %#lx: already hardware poisoned\n", pfn); + if (flags & MF_ACTION_REQUIRED) { + head = compound_head(p); res = kill_accessing_process(current, page_to_pfn(head), flags); + } + return res; + } else if (res == -EBUSY) { + if (retry) { + retry = false; + goto retry; + } + action_result(pfn, MF_MSG_UNKNOWN, MF_IGNORED); return res; } + head = compound_head(p); + lock_page(head); + + if (hwpoison_filter(p)) { + ClearPageHWPoison(head); + res = -EOPNOTSUPP; + goto out; + } + num_poisoned_pages_inc(); - if (!(flags & MF_COUNT_INCREASED)) { - res = get_hwpoison_page(p, flags); - if (!res) { - lock_page(head); - if (hwpoison_filter(p)) { - if (TestClearPageHWPoison(head)) - num_poisoned_pages_dec(); - unlock_page(head); - return -EOPNOTSUPP; - } - unlock_page(head); - res = MF_FAILED; - if (__page_handle_poison(p)) { - page_ref_inc(p); - res = MF_RECOVERED; - } - action_result(pfn, MF_MSG_FREE_HUGE, res); - return res == MF_RECOVERED ? 0 : -EBUSY; - } else if (res < 0) { - action_result(pfn, MF_MSG_UNKNOWN, MF_IGNORED); - return -EBUSY; + /* + * Handling free hugepage. The possible race with hugepage allocation + * or demotion can be prevented by PageHWPoison flag. + */ + if (res == 0) { + unlock_page(head); + res = MF_FAILED; + if (__page_handle_poison(p)) { + page_ref_inc(p); + res = MF_RECOVERED; } + action_result(pfn, MF_MSG_FREE_HUGE, res); + return res == MF_RECOVERED ? 0 : -EBUSY; } - lock_page(head); - /* * The page could have changed compound pages due to race window. * If this happens just bail out. @@ -1501,6 +1564,12 @@ static int memory_failure_hugetlb(unsigned long pfn, int flags) unlock_page(head); return res; } +#else +static inline int try_memory_failure_hugetlb(unsigned long pfn, int flags, int *hugetlb) +{ + return 0; +} +#endif static int memory_failure_dev_pagemap(unsigned long pfn, int flags, struct dev_pagemap *pgmap) @@ -1620,6 +1689,7 @@ int memory_failure(unsigned long pfn, int flags) int res = 0; unsigned long page_flags; bool retry = true; + int hugetlb = 0; if (!sysctl_memory_failure_recovery) panic("Memory failure on page %lx", pfn); @@ -1640,10 +1710,9 @@ int memory_failure(unsigned long pfn, int flags) mutex_lock(&mf_mutex); try_again: - if (PageHuge(p)) { - res = memory_failure_hugetlb(pfn, flags); + res = try_memory_failure_hugetlb(pfn, flags, &hugetlb); + if (hugetlb) goto unlock_mutex; - } if (TestSetPageHWPoison(p)) { pr_err("Memory failure: %#lx: already hardware poisoned\n", From b952aa508786ab4b7328972482f40905af856246 Mon Sep 17 00:00:00 2001 From: Daniel Starke Date: Fri, 22 Apr 2022 00:10:24 -0700 Subject: [PATCH 0396/1056] tty: n_gsm: fix invalid use of MSC in advanced option [ Upstream commit c19ffe00fed6bb423d81406d2a7e5793074c7d83 ] n_gsm is based on the 3GPP 07.010 and its newer version is the 3GPP 27.010. See https://portal.3gpp.org/desktopmodules/Specifications/SpecificationDetails.aspx?specificationId=1516 The changes from 07.010 to 27.010 are non-functional. Therefore, I refer to the newer 27.010 here. Chapter 5.4.6.3.7 states that the Modem Status Command (MSC) shall only be used if the basic option was chosen. The current implementation uses MSC frames even if advanced option was chosen to inform the peer about modem line state updates. A standard conform peer may choose to discard these frames in advanced option mode. Furthermore, gsmtty_modem_update() is not part of the 'tty_operations' functions despite its name. Rename gsmtty_modem_update() to gsm_modem_update() to clarify this. Split its function into gsm_modem_upd_via_data() and gsm_modem_upd_via_msc() depending on the encoding and adaption. Introduce gsm_dlci_modem_output() as adaption of gsm_dlci_data_output() to encode and queue empty frames in advanced option mode. Use it in gsm_modem_upd_via_data(). gsm_modem_upd_via_msc() is based on the initial gsmtty_modem_update() function which used only MSC frames to update modem states. Fixes: e1eaea46bb40 ("tty: n_gsm line discipline") Cc: stable@vger.kernel.org Signed-off-by: Daniel Starke Link: https://lore.kernel.org/r/20220422071025.5490-2-daniel.starke@siemens.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/tty/n_gsm.c | 125 +++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 117 insertions(+), 8 deletions(-) diff --git a/drivers/tty/n_gsm.c b/drivers/tty/n_gsm.c index c52d5e0d5c6f5e..c8ca00fad8e43d 100644 --- a/drivers/tty/n_gsm.c +++ b/drivers/tty/n_gsm.c @@ -371,7 +371,7 @@ static const u8 gsm_fcs8[256] = { #define GOOD_FCS 0xCF static int gsmld_output(struct gsm_mux *gsm, u8 *data, int len); -static int gsmtty_modem_update(struct gsm_dlci *dlci, u8 brk); +static int gsm_modem_update(struct gsm_dlci *dlci, u8 brk); /** * gsm_fcs_add - update FCS @@ -928,6 +928,63 @@ static int gsm_dlci_data_output_framed(struct gsm_mux *gsm, return size; } +/** + * gsm_dlci_modem_output - try and push modem status out of a DLCI + * @gsm: mux + * @dlci: the DLCI to pull modem status from + * @brk: break signal + * + * Push an empty frame in to the transmit queue to update the modem status + * bits and to transmit an optional break. + * + * Caller must hold the tx_lock of the mux. + */ + +static int gsm_dlci_modem_output(struct gsm_mux *gsm, struct gsm_dlci *dlci, + u8 brk) +{ + u8 *dp = NULL; + struct gsm_msg *msg; + int size; + + /* for modem bits without break data */ + if (dlci->adaption == 1) { + size = 0; + } else if (dlci->adaption == 2) { + size = 1; + if (brk > 0) + size++; + } else { + pr_err("%s: unsupported adaption %d\n", __func__, + dlci->adaption); + } + + msg = gsm_data_alloc(gsm, dlci->addr, size, gsm->ftype); + if (!msg) { + pr_err("%s: gsm_data_alloc error", __func__); + return -ENOMEM; + } + dp = msg->data; + switch (dlci->adaption) { + case 1: /* Unstructured */ + break; + case 2: /* Unstructured with modem bits. */ + if (brk == 0) { + *dp++ = (gsm_encode_modem(dlci) << 1) | EA; + } else { + *dp++ = gsm_encode_modem(dlci) << 1; + *dp++ = (brk << 4) | 2 | EA; /* Length, Break, EA */ + } + break; + default: + /* Handled above */ + break; + } + + __gsm_data_queue(dlci, msg); + return size; +} + /** * gsm_dlci_data_sweep - look for data to send * @gsm: the GSM mux @@ -1492,7 +1549,7 @@ static void gsm_dlci_open(struct gsm_dlci *dlci) pr_debug("DLCI %d goes open.\n", dlci->addr); /* Send current modem state */ if (dlci->addr) - gsmtty_modem_update(dlci, 0); + gsm_modem_update(dlci, 0); wake_up(&dlci->gsm->event); } @@ -2977,12 +3034,43 @@ static struct tty_ldisc_ops tty_ldisc_packet = { #define TX_SIZE 512 -static int gsmtty_modem_update(struct gsm_dlci *dlci, u8 brk) +/** + * gsm_modem_upd_via_data - send modem bits via convergence layer + * @dlci: channel + * @brk: break signal + * + * Send an empty frame to signal mobile state changes and to transmit the + * break signal for adaption 2. + */ + +static void gsm_modem_upd_via_data(struct gsm_dlci *dlci, u8 brk) +{ + struct gsm_mux *gsm = dlci->gsm; + unsigned long flags; + + if (dlci->state != DLCI_OPEN || dlci->adaption != 2) + return; + + spin_lock_irqsave(&gsm->tx_lock, flags); + gsm_dlci_modem_output(gsm, dlci, brk); + spin_unlock_irqrestore(&gsm->tx_lock, flags); +} + +/** + * gsm_modem_upd_via_msc - send modem bits via control frame + * @dlci: channel + * @brk: break signal + */ + +static int gsm_modem_upd_via_msc(struct gsm_dlci *dlci, u8 brk) { u8 modembits[3]; struct gsm_control *ctrl; int len = 2; + if (dlci->gsm->encoding != 0) + return 0; + modembits[0] = (dlci->addr << 2) | 2 | EA; /* DLCI, Valid, EA */ if (!brk) { modembits[1] = (gsm_encode_modem(dlci) << 1) | EA; @@ -2997,6 +3085,27 @@ static int gsmtty_modem_update(struct gsm_dlci *dlci, u8 brk) return gsm_control_wait(dlci->gsm, ctrl); } +/** + * gsm_modem_update - send modem status line state + * @dlci: channel + * @brk: break signal + */ + +static int gsm_modem_update(struct gsm_dlci *dlci, u8 brk) +{ + if (dlci->adaption == 2) { + /* Send convergence layer type 2 empty data frame. */ + gsm_modem_upd_via_data(dlci, brk); + return 0; + } else if (dlci->gsm->encoding == 0) { + /* Send as MSC control message. */ + return gsm_modem_upd_via_msc(dlci, brk); + } + + /* Modem status lines are not supported. */ + return -EPROTONOSUPPORT; +} + static int gsm_carrier_raised(struct tty_port *port) { struct gsm_dlci *dlci = container_of(port, struct gsm_dlci, port); @@ -3029,7 +3138,7 @@ static void gsm_dtr_rts(struct tty_port *port, int onoff) modem_tx &= ~(TIOCM_DTR | TIOCM_RTS); if (modem_tx != dlci->modem_tx) { dlci->modem_tx = modem_tx; - gsmtty_modem_update(dlci, 0); + gsm_modem_update(dlci, 0); } } @@ -3218,7 +3327,7 @@ static int gsmtty_tiocmset(struct tty_struct *tty, if (modem_tx != dlci->modem_tx) { dlci->modem_tx = modem_tx; - return gsmtty_modem_update(dlci, 0); + return gsm_modem_update(dlci, 0); } return 0; } @@ -3279,7 +3388,7 @@ static void gsmtty_throttle(struct tty_struct *tty) dlci->modem_tx &= ~TIOCM_RTS; dlci->throttled = true; /* Send an MSC with RTS cleared */ - gsmtty_modem_update(dlci, 0); + gsm_modem_update(dlci, 0); } static void gsmtty_unthrottle(struct tty_struct *tty) @@ -3291,7 +3400,7 @@ static void gsmtty_unthrottle(struct tty_struct *tty) dlci->modem_tx |= TIOCM_RTS; dlci->throttled = false; /* Send an MSC with RTS set */ - gsmtty_modem_update(dlci, 0); + gsm_modem_update(dlci, 0); } static int gsmtty_break_ctl(struct tty_struct *tty, int state) @@ -3309,7 +3418,7 @@ static int gsmtty_break_ctl(struct tty_struct *tty, int state) if (encode > 0x0F) encode = 0x0F; /* Best effort */ } - return gsmtty_modem_update(dlci, encode); + return gsm_modem_update(dlci, encode); } static void gsmtty_cleanup(struct tty_struct *tty) From e58094e2b51699fe1046104f4d2425afc7b31053 Mon Sep 17 00:00:00 2001 From: Daniel Starke Date: Mon, 25 Apr 2022 03:47:26 -0700 Subject: [PATCH 0397/1056] tty: n_gsm: fix sometimes uninitialized warning in gsm_dlci_modem_output() [ Upstream commit 19317433057dc1f2ca9a975e4e6b547282c2a5ef ] 'size' may be used uninitialized in gsm_dlci_modem_output() if called with an adaption that is neither 1 nor 2. The function is currently only called by gsm_modem_upd_via_data() and only for adaption 2. Properly handle every invalid case by returning -EINVAL to silence the compiler warning and avoid future regressions. Fixes: c19ffe00fed6 ("tty: n_gsm: fix invalid use of MSC in advanced option") Cc: stable@vger.kernel.org Reported-by: kernel test robot Signed-off-by: Daniel Starke Link: https://lore.kernel.org/r/20220425104726.7986-1-daniel.starke@siemens.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/tty/n_gsm.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/drivers/tty/n_gsm.c b/drivers/tty/n_gsm.c index c8ca00fad8e43d..fd4a86111a6e10 100644 --- a/drivers/tty/n_gsm.c +++ b/drivers/tty/n_gsm.c @@ -945,18 +945,21 @@ static int gsm_dlci_modem_output(struct gsm_mux *gsm, struct gsm_dlci *dlci, { u8 *dp = NULL; struct gsm_msg *msg; - int size; + int size = 0; /* for modem bits without break data */ - if (dlci->adaption == 1) { - size = 0; - } else if (dlci->adaption == 2) { - size = 1; + switch (dlci->adaption) { + case 1: /* Unstructured */ + break; + case 2: /* Unstructured with modem bits. */ + size++; if (brk > 0) size++; - } else { + break; + default: pr_err("%s: unsupported adaption %d\n", __func__, dlci->adaption); + return -EINVAL; } msg = gsm_data_alloc(gsm, dlci->addr, size, gsm->ftype); From 156f2c2378e1cd6760c0486f3e883afe8675d5e2 Mon Sep 17 00:00:00 2001 From: AngeloGioacchino Del Regno Date: Wed, 27 Apr 2022 15:23:27 +0200 Subject: [PATCH 0398/1056] serial: 8250_mtk: Make sure to select the right FEATURE_SEL [ Upstream commit 6f81fdded0d024c7d4084d434764f30bca1cd6b1 ] Set the FEATURE_SEL at probe time to make sure that BIT(0) is enabled: this guarantees that when the port is configured as AP UART, the right register layout is interpreted by the UART IP. Signed-off-by: AngeloGioacchino Del Regno Cc: stable Link: https://lore.kernel.org/r/20220427132328.228297-3-angelogioacchino.delregno@collabora.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/tty/serial/8250/8250_mtk.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/tty/serial/8250/8250_mtk.c b/drivers/tty/serial/8250/8250_mtk.c index de48a58460f47a..de57f47635cdd1 100644 --- a/drivers/tty/serial/8250/8250_mtk.c +++ b/drivers/tty/serial/8250/8250_mtk.c @@ -57,6 +57,9 @@ #define MTK_UART_XON1 40 /* I/O: Xon character 1 */ #define MTK_UART_XOFF1 42 /* I/O: Xoff character 1 */ +#define MTK_UART_FEATURE_SEL 39 /* Feature Selection register */ +#define MTK_UART_FEAT_NEWRMAP BIT(0) /* Use new register map */ + #ifdef CONFIG_SERIAL_8250_DMA enum dma_rx_status { DMA_RX_START = 0, @@ -572,6 +575,10 @@ static int mtk8250_probe(struct platform_device *pdev) uart.dma = data->dma; #endif + /* Set AP UART new register map */ + writel(MTK_UART_FEAT_NEWRMAP, uart.port.membase + + (MTK_UART_FEATURE_SEL << uart.port.regshift)); + /* Disable Rate Fix function */ writel(0x0, uart.port.membase + (MTK_UART_RATE_FIX << uart.port.regshift)); From 00fa5cbbb6a7a7035e7694a1b516904a3f51c2d4 Mon Sep 17 00:00:00 2001 From: Daniel Starke Date: Wed, 4 May 2022 10:17:33 +0200 Subject: [PATCH 0399/1056] tty: n_gsm: fix invalid gsmtty_write_room() result [ Upstream commit 9361ebfbb79fd1bc8594a487c01ad52cdaa391ea ] gsmtty_write() does not prevent the user to use the full fifo size of 4096 bytes as allocated in gsm_dlci_alloc(). However, gsmtty_write_room() tries to limit the return value by 'TX_SIZE' and returns a negative value if the fifo has more than 'TX_SIZE' bytes stored. This is obviously wrong as 'TX_SIZE' is defined as 512. Define 'TX_SIZE' to the fifo size and use it accordingly for allocation to keep the current behavior. Return the correct remaining size of the fifo in gsmtty_write_room() via kfifo_avail(). Fixes: e1eaea46bb40 ("tty: n_gsm line discipline") Cc: stable@vger.kernel.org Signed-off-by: Daniel Starke Link: https://lore.kernel.org/r/20220504081733.3494-3-daniel.starke@siemens.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/tty/n_gsm.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/tty/n_gsm.c b/drivers/tty/n_gsm.c index fd4a86111a6e10..4a430f6ca17062 100644 --- a/drivers/tty/n_gsm.c +++ b/drivers/tty/n_gsm.c @@ -137,6 +137,7 @@ struct gsm_dlci { int retries; /* Uplink tty if active */ struct tty_port port; /* The tty bound to this DLCI if there is one */ +#define TX_SIZE 4096 /* Must be power of 2. */ struct kfifo fifo; /* Queue fifo for the DLCI */ int adaption; /* Adaption layer in use */ int prev_adaption; @@ -1758,7 +1759,7 @@ static struct gsm_dlci *gsm_dlci_alloc(struct gsm_mux *gsm, int addr) return NULL; spin_lock_init(&dlci->lock); mutex_init(&dlci->mutex); - if (kfifo_alloc(&dlci->fifo, 4096, GFP_KERNEL) < 0) { + if (kfifo_alloc(&dlci->fifo, TX_SIZE, GFP_KERNEL) < 0) { kfree(dlci); return NULL; } @@ -3035,8 +3036,6 @@ static struct tty_ldisc_ops tty_ldisc_packet = { * Virtual tty side */ -#define TX_SIZE 512 - /** * gsm_modem_upd_via_data - send modem bits via convergence layer * @dlci: channel @@ -3274,7 +3273,7 @@ static unsigned int gsmtty_write_room(struct tty_struct *tty) struct gsm_dlci *dlci = tty->driver_data; if (dlci->state == DLCI_CLOSED) return 0; - return TX_SIZE - kfifo_len(&dlci->fifo); + return kfifo_avail(&dlci->fifo); } static unsigned int gsmtty_chars_in_buffer(struct tty_struct *tty) From 0a9a60dcedaacde4b903337b7445cb431b4dd119 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Wed, 16 Feb 2022 23:19:58 -0600 Subject: [PATCH 0400/1056] drm/amd: Refactor `amdgpu_aspm` to be evaluated per device [ Upstream commit 0ab5d711ec74d9e60673900974806b7688857947 ] Evaluating `pcie_aspm_enabled` as part of driver probe has the implication that if one PCIe bridge with an AMD GPU connected doesn't support ASPM then none of them do. This is an invalid assumption as the PCIe core will configure ASPM for individual PCIe bridges. Create a new helper function that can be called by individual dGPUs to react to the `amdgpu_aspm` module parameter without having negative results for other dGPUs on the PCIe bus. Suggested-by: Lijo Lazar Reviewed-by: Lijo Lazar Signed-off-by: Mario Limonciello Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 25 +++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/cik.c | 2 +- drivers/gpu/drm/amd/amdgpu/nv.c | 2 +- drivers/gpu/drm/amd/amdgpu/si.c | 2 +- drivers/gpu/drm/amd/amdgpu/soc15.c | 2 +- drivers/gpu/drm/amd/amdgpu/vi.c | 2 +- .../amd/pm/swsmu/smu11/sienna_cichlid_ppt.c | 2 +- 8 files changed, 32 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 2eebefd26fa82c..5f95d03fd46a03 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1285,6 +1285,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, void amdgpu_device_pci_config_reset(struct amdgpu_device *adev); int amdgpu_device_pci_reset(struct amdgpu_device *adev); bool amdgpu_device_need_post(struct amdgpu_device *adev); +bool amdgpu_device_should_use_aspm(struct amdgpu_device *adev); void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, u64 num_bytes, u64 num_vis_bytes); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index a926b5ebbfdf5e..d1af709cc7dca6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -1309,6 +1309,31 @@ bool amdgpu_device_need_post(struct amdgpu_device *adev) return true; } +/** + * amdgpu_device_should_use_aspm - check if the device should program ASPM + * + * @adev: amdgpu_device pointer + * + * Confirm whether the module parameter and pcie bridge agree that ASPM should + * be set for this device. + * + * Returns true if it should be used or false if not. + */ +bool amdgpu_device_should_use_aspm(struct amdgpu_device *adev) +{ + switch (amdgpu_aspm) { + case -1: + break; + case 0: + return false; + case 1: + return true; + default: + return false; + } + return pcie_aspm_enabled(adev->pdev); +} + /* if we get transitioned to only one device, take VGA back */ /** * amdgpu_device_vga_set_decode - enable/disable vga decode diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c index f10ce740a29cce..de6d10390ab2f1 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik.c +++ b/drivers/gpu/drm/amd/amdgpu/cik.c @@ -1719,7 +1719,7 @@ static void cik_program_aspm(struct amdgpu_device *adev) bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false; bool disable_clkreq = false; - if (amdgpu_aspm == 0) + if (!amdgpu_device_should_use_aspm(adev)) return; if (pci_is_root_bus(adev->pdev->bus)) diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c index 9cbed9a8f1c04b..6e277236b44fba 100644 --- a/drivers/gpu/drm/amd/amdgpu/nv.c +++ b/drivers/gpu/drm/amd/amdgpu/nv.c @@ -584,7 +584,7 @@ static void nv_pcie_gen3_enable(struct amdgpu_device *adev) static void nv_program_aspm(struct amdgpu_device *adev) { - if (!amdgpu_aspm) + if (!amdgpu_device_should_use_aspm(adev)) return; if (!(adev->flags & AMD_IS_APU) && diff --git a/drivers/gpu/drm/amd/amdgpu/si.c b/drivers/gpu/drm/amd/amdgpu/si.c index e6d2f74a79765d..7f99e130acd066 100644 --- a/drivers/gpu/drm/amd/amdgpu/si.c +++ b/drivers/gpu/drm/amd/amdgpu/si.c @@ -2453,7 +2453,7 @@ static void si_program_aspm(struct amdgpu_device *adev) bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false; bool disable_clkreq = false; - if (amdgpu_aspm == 0) + if (!amdgpu_device_should_use_aspm(adev)) return; if (adev->flags & AMD_IS_APU) diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index 6439d5c3d8d8b3..bdb47ae96ce6ac 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -689,7 +689,7 @@ static void soc15_pcie_gen3_enable(struct amdgpu_device *adev) static void soc15_program_aspm(struct amdgpu_device *adev) { - if (!amdgpu_aspm) + if (!amdgpu_device_should_use_aspm(adev)) return; if (!(adev->flags & AMD_IS_APU) && diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c index 6645ebbd2696c6..039b90cdc3bcac 100644 --- a/drivers/gpu/drm/amd/amdgpu/vi.c +++ b/drivers/gpu/drm/amd/amdgpu/vi.c @@ -1140,7 +1140,7 @@ static void vi_program_aspm(struct amdgpu_device *adev) bool bL1SS = false; bool bClkReqSupport = true; - if (!amdgpu_aspm) + if (!amdgpu_device_should_use_aspm(adev)) return; if (adev->flags & AMD_IS_APU || diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c index 574a9d7f7a5e74..918d5c7c2328b6 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c @@ -338,7 +338,7 @@ sienna_cichlid_get_allowed_feature_mask(struct smu_context *smu, if (smu->dc_controlled_by_gpio) *(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_ACDC_BIT); - if (amdgpu_aspm) + if (amdgpu_device_should_use_aspm(adev)) *(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_DS_LCLK_BIT); return 0; From 7a9e13b86536ce6dca54380f19d537b1c80caee3 Mon Sep 17 00:00:00 2001 From: Richard Gong Date: Fri, 8 Apr 2022 12:08:38 -0500 Subject: [PATCH 0401/1056] drm/amdgpu: vi: disable ASPM on Intel Alder Lake based systems [ Upstream commit aa482ddca85a3485be0e7b83a0789dc4d987670b ] Active State Power Management (ASPM) feature is enabled since kernel 5.14. There are some AMD Volcanic Islands (VI) GFX cards, such as the WX3200 and RX640, that do not work with ASPM-enabled Intel Alder Lake based systems. Using these GFX cards as video/display output, Intel Alder Lake based systems will freeze after suspend/resume. The issue was originally reported on one system (Dell Precision 3660 with BIOS version 0.14.81), but was later confirmed to affect at least 4 pre-production Alder Lake based systems. Add an extra check to disable ASPM on Intel Alder Lake based systems with the problematic AMD Volcanic Islands GFX cards. Fixes: 0064b0ce85bb ("drm/amd/pm: enable ASPM by default") Link: https://gitlab.freedesktop.org/drm/amd/-/issues/1885 Signed-off-by: Richard Gong Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/amdgpu/vi.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c index 039b90cdc3bcac..45f0188c42739b 100644 --- a/drivers/gpu/drm/amd/amdgpu/vi.c +++ b/drivers/gpu/drm/amd/amdgpu/vi.c @@ -81,6 +81,10 @@ #include "mxgpu_vi.h" #include "amdgpu_dm.h" +#if IS_ENABLED(CONFIG_X86) +#include +#endif + #define ixPCIE_LC_L1_PM_SUBSTATE 0x100100C6 #define PCIE_LC_L1_PM_SUBSTATE__LC_L1_SUBSTATES_OVERRIDE_EN_MASK 0x00000001L #define PCIE_LC_L1_PM_SUBSTATE__LC_PCI_PM_L1_2_OVERRIDE_MASK 0x00000002L @@ -1134,13 +1138,24 @@ static void vi_enable_aspm(struct amdgpu_device *adev) WREG32_PCIE(ixPCIE_LC_CNTL, data); } +static bool aspm_support_quirk_check(void) +{ +#if IS_ENABLED(CONFIG_X86) + struct cpuinfo_x86 *c = &cpu_data(0); + + return !(c->x86 == 6 && c->x86_model == INTEL_FAM6_ALDERLAKE); +#else + return true; +#endif +} + static void vi_program_aspm(struct amdgpu_device *adev) { u32 data, data1, orig; bool bL1SS = false; bool bClkReqSupport = true; - if (!amdgpu_device_should_use_aspm(adev)) + if (!amdgpu_device_should_use_aspm(adev) || !aspm_support_quirk_check()) return; if (adev->flags & AMD_IS_APU || From 51a405dea0ae54330b6441c5f7c3bb9ceadedce8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= Date: Thu, 27 Jan 2022 12:56:22 +0100 Subject: [PATCH 0402/1056] drm/i915: Fix a race between vma / object destruction and unbinding MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit bc1922e5d349db4be14c55513102c024c2ae8a50 ] The vma destruction code was using an unlocked advisory check for drm_mm_node_allocated() to avoid racing with eviction code unbinding the vma. This is very fragile and prohibits the dereference of non-refcounted pointers of dying vmas after a call to __i915_vma_unbind(). It also prohibits the dereference of vma->obj of refcounted pointers of dying vmas after a call to __i915_vma_unbind(), since even if a refcount is held on the vma, that won't guarantee that its backing object doesn't get destroyed. So introduce an unbind under the vm mutex at object destroy time, removing all weak references of the vma and its object from the object vma list and from the vm bound list. Signed-off-by: Thomas Hellström Reviewed-by: Maarten Lankhorst Link: https://patchwork.freedesktop.org/patch/msgid/20220127115622.302970-1-thomas.hellstrom@linux.intel.com Signed-off-by: Sasha Levin --- drivers/gpu/drm/i915/gem/i915_gem_object.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c index 6fb9afb65034b5..5f48d5ea5c1583 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c @@ -224,6 +224,12 @@ void __i915_gem_free_object(struct drm_i915_gem_object *obj) GEM_BUG_ON(vma->obj != obj); spin_unlock(&obj->vma.lock); + /* Verify that the vma is unbound under the vm mutex. */ + mutex_lock(&vma->vm->mutex); + atomic_and(~I915_VMA_PIN_MASK, &vma->flags); + __i915_vma_unbind(vma); + mutex_unlock(&vma->vm->mutex); + __i915_vma_put(vma); spin_lock(&obj->vma.lock); From d953c679022cb2c2cd58ebb5e09a47e4492c3744 Mon Sep 17 00:00:00 2001 From: Chun-Kuang Hu Date: Thu, 28 Oct 2021 18:19:07 +0800 Subject: [PATCH 0403/1056] drm/mediatek: Use mailbox rx_callback instead of cmdq_task_cb [ Upstream commit 1ee07a683b7e4e6ad9ad4f77fce4751741bc8ceb ] rx_callback is a standard mailbox callback mechanism and could cover the function of proprietary cmdq_task_cb, so use the standard one instead of the proprietary one. Signed-off-by: Chun-Kuang Hu Signed-off-by: jason-jh.lin Signed-off-by: Sasha Levin --- drivers/gpu/drm/mediatek/mtk_drm_crtc.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/mediatek/mtk_drm_crtc.c b/drivers/gpu/drm/mediatek/mtk_drm_crtc.c index a4e80e4996748d..369d3e68c0b60a 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_crtc.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_crtc.c @@ -4,6 +4,8 @@ */ #include +#include +#include #include #include #include @@ -222,9 +224,11 @@ struct mtk_ddp_comp *mtk_drm_ddp_comp_for_plane(struct drm_crtc *crtc, } #if IS_REACHABLE(CONFIG_MTK_CMDQ) -static void ddp_cmdq_cb(struct cmdq_cb_data data) +static void ddp_cmdq_cb(struct mbox_client *cl, void *mssg) { - cmdq_pkt_destroy(data.data); + struct cmdq_cb_data *data = mssg; + + cmdq_pkt_destroy(data->pkt); } #endif @@ -475,7 +479,12 @@ static void mtk_drm_crtc_update_config(struct mtk_drm_crtc *mtk_crtc, cmdq_pkt_wfe(cmdq_handle, mtk_crtc->cmdq_event, false); mtk_crtc_ddp_config(crtc, cmdq_handle); cmdq_pkt_finalize(cmdq_handle); - cmdq_pkt_flush_async(cmdq_handle, ddp_cmdq_cb, cmdq_handle); + dma_sync_single_for_device(mtk_crtc->cmdq_client->chan->mbox->dev, + cmdq_handle->pa_base, + cmdq_handle->cmd_buf_size, + DMA_TO_DEVICE); + mbox_send_message(mtk_crtc->cmdq_client->chan, cmdq_handle); + mbox_client_txdone(mtk_crtc->cmdq_client->chan, 0); } #endif mtk_crtc->config_updating = false; @@ -839,6 +848,7 @@ int mtk_drm_crtc_create(struct drm_device *drm_dev, } if (mtk_crtc->cmdq_client) { + mtk_crtc->cmdq_client->client.rx_callback = ddp_cmdq_cb; ret = of_property_read_u32_index(priv->mutex_node, "mediatek,gce-events", drm_crtc_index(&mtk_crtc->base), From 6f77386ddb1cca97da95fb116c2975cc529936f5 Mon Sep 17 00:00:00 2001 From: Chun-Kuang Hu Date: Thu, 28 Oct 2021 18:19:08 +0800 Subject: [PATCH 0404/1056] drm/mediatek: Remove the pointer of struct cmdq_client [ Upstream commit 563c9d4a5b117552150efbecbaf0877947e98a32 ] In mailbox rx_callback, it pass struct mbox_client to callback function, but it could not map back to mtk_drm_crtc instance because struct cmdq_client use a pointer to struct mbox_client: struct cmdq_client { struct mbox_client client; struct mbox_chan *chan; }; struct mtk_drm_crtc { /* client instance data */ struct cmdq_client *cmdq_client; }; so remove the pointer of struct cmdq_client and let mtk_drm_crtc instance define cmdq_client as: struct mtk_drm_crtc { /* client instance data */ struct cmdq_client cmdq_client; }; and in rx_callback function, use struct mbox_client to get struct mtk_drm_crtc. Signed-off-by: Chun-Kuang Hu Signed-off-by: jason-jh.lin Signed-off-by: Sasha Levin --- drivers/gpu/drm/mediatek/mtk_drm_crtc.c | 37 +++++++++++++------------ 1 file changed, 20 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/mediatek/mtk_drm_crtc.c b/drivers/gpu/drm/mediatek/mtk_drm_crtc.c index 369d3e68c0b60a..e23e3224ac67eb 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_crtc.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_crtc.c @@ -52,7 +52,7 @@ struct mtk_drm_crtc { bool pending_async_planes; #if IS_REACHABLE(CONFIG_MTK_CMDQ) - struct cmdq_client *cmdq_client; + struct cmdq_client cmdq_client; u32 cmdq_event; #endif @@ -472,19 +472,19 @@ static void mtk_drm_crtc_update_config(struct mtk_drm_crtc *mtk_crtc, mtk_mutex_release(mtk_crtc->mutex); } #if IS_REACHABLE(CONFIG_MTK_CMDQ) - if (mtk_crtc->cmdq_client) { - mbox_flush(mtk_crtc->cmdq_client->chan, 2000); - cmdq_handle = cmdq_pkt_create(mtk_crtc->cmdq_client, PAGE_SIZE); + if (mtk_crtc->cmdq_client.chan) { + mbox_flush(mtk_crtc->cmdq_client.chan, 2000); + cmdq_handle = cmdq_pkt_create(&mtk_crtc->cmdq_client, PAGE_SIZE); cmdq_pkt_clear_event(cmdq_handle, mtk_crtc->cmdq_event); cmdq_pkt_wfe(cmdq_handle, mtk_crtc->cmdq_event, false); mtk_crtc_ddp_config(crtc, cmdq_handle); cmdq_pkt_finalize(cmdq_handle); - dma_sync_single_for_device(mtk_crtc->cmdq_client->chan->mbox->dev, + dma_sync_single_for_device(mtk_crtc->cmdq_client.chan->mbox->dev, cmdq_handle->pa_base, cmdq_handle->cmd_buf_size, DMA_TO_DEVICE); - mbox_send_message(mtk_crtc->cmdq_client->chan, cmdq_handle); - mbox_client_txdone(mtk_crtc->cmdq_client->chan, 0); + mbox_send_message(mtk_crtc->cmdq_client.chan, cmdq_handle); + mbox_client_txdone(mtk_crtc->cmdq_client.chan, 0); } #endif mtk_crtc->config_updating = false; @@ -498,7 +498,7 @@ static void mtk_crtc_ddp_irq(void *data) struct mtk_drm_private *priv = crtc->dev->dev_private; #if IS_REACHABLE(CONFIG_MTK_CMDQ) - if (!priv->data->shadow_register && !mtk_crtc->cmdq_client) + if (!priv->data->shadow_register && !mtk_crtc->cmdq_client.chan) #else if (!priv->data->shadow_register) #endif @@ -838,17 +838,20 @@ int mtk_drm_crtc_create(struct drm_device *drm_dev, mutex_init(&mtk_crtc->hw_lock); #if IS_REACHABLE(CONFIG_MTK_CMDQ) - mtk_crtc->cmdq_client = - cmdq_mbox_create(mtk_crtc->mmsys_dev, - drm_crtc_index(&mtk_crtc->base)); - if (IS_ERR(mtk_crtc->cmdq_client)) { + mtk_crtc->cmdq_client.client.dev = mtk_crtc->mmsys_dev; + mtk_crtc->cmdq_client.client.tx_block = false; + mtk_crtc->cmdq_client.client.knows_txdone = true; + mtk_crtc->cmdq_client.client.rx_callback = ddp_cmdq_cb; + mtk_crtc->cmdq_client.chan = + mbox_request_channel(&mtk_crtc->cmdq_client.client, + drm_crtc_index(&mtk_crtc->base)); + if (IS_ERR(mtk_crtc->cmdq_client.chan)) { dev_dbg(dev, "mtk_crtc %d failed to create mailbox client, writing register by CPU now\n", drm_crtc_index(&mtk_crtc->base)); - mtk_crtc->cmdq_client = NULL; + mtk_crtc->cmdq_client.chan = NULL; } - if (mtk_crtc->cmdq_client) { - mtk_crtc->cmdq_client->client.rx_callback = ddp_cmdq_cb; + if (mtk_crtc->cmdq_client.chan) { ret = of_property_read_u32_index(priv->mutex_node, "mediatek,gce-events", drm_crtc_index(&mtk_crtc->base), @@ -856,8 +859,8 @@ int mtk_drm_crtc_create(struct drm_device *drm_dev, if (ret) { dev_dbg(dev, "mtk_crtc %d failed to get mediatek,gce-events property\n", drm_crtc_index(&mtk_crtc->base)); - cmdq_mbox_destroy(mtk_crtc->cmdq_client); - mtk_crtc->cmdq_client = NULL; + mbox_free_channel(mtk_crtc->cmdq_client.chan); + mtk_crtc->cmdq_client.chan = NULL; } } #endif From d3f15355704510e22fa395b4030bcd50e6b99969 Mon Sep 17 00:00:00 2001 From: Chun-Kuang Hu Date: Wed, 27 Oct 2021 10:18:54 +0800 Subject: [PATCH 0405/1056] drm/mediatek: Detect CMDQ execution timeout [ Upstream commit eaf80126aba6fd1754837eec91e4c8bbd58ae52e ] CMDQ is used to update display register in vblank period, so it should be execute in next 2 vblank. One vblank interrupt before send message (occasionally) and one vblank interrupt after cmdq done. If it fail to execute in next 3 vblank, tiemout happen. Signed-off-by: Chun-Kuang Hu Signed-off-by: jason-jh.lin Signed-off-by: Sasha Levin --- drivers/gpu/drm/mediatek/mtk_drm_crtc.c | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/mediatek/mtk_drm_crtc.c b/drivers/gpu/drm/mediatek/mtk_drm_crtc.c index e23e3224ac67eb..dad1f85ee31545 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_crtc.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_crtc.c @@ -54,6 +54,7 @@ struct mtk_drm_crtc { #if IS_REACHABLE(CONFIG_MTK_CMDQ) struct cmdq_client cmdq_client; u32 cmdq_event; + u32 cmdq_vblank_cnt; #endif struct device *mmsys_dev; @@ -227,7 +228,10 @@ struct mtk_ddp_comp *mtk_drm_ddp_comp_for_plane(struct drm_crtc *crtc, static void ddp_cmdq_cb(struct mbox_client *cl, void *mssg) { struct cmdq_cb_data *data = mssg; + struct cmdq_client *cmdq_cl = container_of(cl, struct cmdq_client, client); + struct mtk_drm_crtc *mtk_crtc = container_of(cmdq_cl, struct mtk_drm_crtc, cmdq_client); + mtk_crtc->cmdq_vblank_cnt = 0; cmdq_pkt_destroy(data->pkt); } #endif @@ -483,6 +487,15 @@ static void mtk_drm_crtc_update_config(struct mtk_drm_crtc *mtk_crtc, cmdq_handle->pa_base, cmdq_handle->cmd_buf_size, DMA_TO_DEVICE); + /* + * CMDQ command should execute in next 3 vblank. + * One vblank interrupt before send message (occasionally) + * and one vblank interrupt after cmdq done, + * so it's timeout after 3 vblank interrupt. + * If it fail to execute in next 3 vblank, timeout happen. + */ + mtk_crtc->cmdq_vblank_cnt = 3; + mbox_send_message(mtk_crtc->cmdq_client.chan, cmdq_handle); mbox_client_txdone(mtk_crtc->cmdq_client.chan, 0); } @@ -499,11 +512,14 @@ static void mtk_crtc_ddp_irq(void *data) #if IS_REACHABLE(CONFIG_MTK_CMDQ) if (!priv->data->shadow_register && !mtk_crtc->cmdq_client.chan) + mtk_crtc_ddp_config(crtc, NULL); + else if (mtk_crtc->cmdq_vblank_cnt > 0 && --mtk_crtc->cmdq_vblank_cnt == 0) + DRM_ERROR("mtk_crtc %d CMDQ execute command timeout!\n", + drm_crtc_index(&mtk_crtc->base)); #else if (!priv->data->shadow_register) -#endif mtk_crtc_ddp_config(crtc, NULL); - +#endif mtk_drm_finish_page_flip(mtk_crtc); } From 2c4396693698e876e559768d3d3a150c672ec384 Mon Sep 17 00:00:00 2001 From: Chun-Kuang Hu Date: Thu, 28 Oct 2021 18:19:10 +0800 Subject: [PATCH 0406/1056] drm/mediatek: Add cmdq_handle in mtk_crtc [ Upstream commit 7627122fd1c06800a1fe624e9fb3c269796115e8 ] One mtk_crtc need just one cmdq_handle, so add one cmdq_handle in mtk_crtc to prevent frequently allocation and free of cmdq_handle. Signed-off-by: Chun-Kuang Hu Signed-off-by: jason-jh.lin Signed-off-by: Sasha Levin --- drivers/gpu/drm/mediatek/mtk_drm_crtc.c | 62 +++++++++++++++++++++++-- 1 file changed, 57 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/mediatek/mtk_drm_crtc.c b/drivers/gpu/drm/mediatek/mtk_drm_crtc.c index dad1f85ee31545..ffa54b416ca76a 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_crtc.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_crtc.c @@ -53,6 +53,7 @@ struct mtk_drm_crtc { #if IS_REACHABLE(CONFIG_MTK_CMDQ) struct cmdq_client cmdq_client; + struct cmdq_pkt cmdq_handle; u32 cmdq_event; u32 cmdq_vblank_cnt; #endif @@ -107,12 +108,55 @@ static void mtk_drm_finish_page_flip(struct mtk_drm_crtc *mtk_crtc) } } +#if IS_REACHABLE(CONFIG_MTK_CMDQ) +static int mtk_drm_cmdq_pkt_create(struct cmdq_client *client, struct cmdq_pkt *pkt, + size_t size) +{ + struct device *dev; + dma_addr_t dma_addr; + + pkt->va_base = kzalloc(size, GFP_KERNEL); + if (!pkt->va_base) { + kfree(pkt); + return -ENOMEM; + } + pkt->buf_size = size; + pkt->cl = (void *)client; + + dev = client->chan->mbox->dev; + dma_addr = dma_map_single(dev, pkt->va_base, pkt->buf_size, + DMA_TO_DEVICE); + if (dma_mapping_error(dev, dma_addr)) { + dev_err(dev, "dma map failed, size=%u\n", (u32)(u64)size); + kfree(pkt->va_base); + kfree(pkt); + return -ENOMEM; + } + + pkt->pa_base = dma_addr; + + return 0; +} + +static void mtk_drm_cmdq_pkt_destroy(struct cmdq_pkt *pkt) +{ + struct cmdq_client *client = (struct cmdq_client *)pkt->cl; + + dma_unmap_single(client->chan->mbox->dev, pkt->pa_base, pkt->buf_size, + DMA_TO_DEVICE); + kfree(pkt->va_base); + kfree(pkt); +} +#endif + static void mtk_drm_crtc_destroy(struct drm_crtc *crtc) { struct mtk_drm_crtc *mtk_crtc = to_mtk_crtc(crtc); mtk_mutex_put(mtk_crtc->mutex); - +#if IS_REACHABLE(CONFIG_MTK_CMDQ) + mtk_drm_cmdq_pkt_destroy(&mtk_crtc->cmdq_handle); +#endif drm_crtc_cleanup(crtc); } @@ -227,12 +271,10 @@ struct mtk_ddp_comp *mtk_drm_ddp_comp_for_plane(struct drm_crtc *crtc, #if IS_REACHABLE(CONFIG_MTK_CMDQ) static void ddp_cmdq_cb(struct mbox_client *cl, void *mssg) { - struct cmdq_cb_data *data = mssg; struct cmdq_client *cmdq_cl = container_of(cl, struct cmdq_client, client); struct mtk_drm_crtc *mtk_crtc = container_of(cmdq_cl, struct mtk_drm_crtc, cmdq_client); mtk_crtc->cmdq_vblank_cnt = 0; - cmdq_pkt_destroy(data->pkt); } #endif @@ -438,7 +480,7 @@ static void mtk_drm_crtc_update_config(struct mtk_drm_crtc *mtk_crtc, bool needs_vblank) { #if IS_REACHABLE(CONFIG_MTK_CMDQ) - struct cmdq_pkt *cmdq_handle; + struct cmdq_pkt *cmdq_handle = &mtk_crtc->cmdq_handle; #endif struct drm_crtc *crtc = &mtk_crtc->base; struct mtk_drm_private *priv = crtc->dev->dev_private; @@ -478,7 +520,7 @@ static void mtk_drm_crtc_update_config(struct mtk_drm_crtc *mtk_crtc, #if IS_REACHABLE(CONFIG_MTK_CMDQ) if (mtk_crtc->cmdq_client.chan) { mbox_flush(mtk_crtc->cmdq_client.chan, 2000); - cmdq_handle = cmdq_pkt_create(&mtk_crtc->cmdq_client, PAGE_SIZE); + cmdq_handle->cmd_buf_size = 0; cmdq_pkt_clear_event(cmdq_handle, mtk_crtc->cmdq_event); cmdq_pkt_wfe(cmdq_handle, mtk_crtc->cmdq_event, false); mtk_crtc_ddp_config(crtc, cmdq_handle); @@ -877,6 +919,16 @@ int mtk_drm_crtc_create(struct drm_device *drm_dev, drm_crtc_index(&mtk_crtc->base)); mbox_free_channel(mtk_crtc->cmdq_client.chan); mtk_crtc->cmdq_client.chan = NULL; + } else { + ret = mtk_drm_cmdq_pkt_create(&mtk_crtc->cmdq_client, + &mtk_crtc->cmdq_handle, + PAGE_SIZE); + if (ret) { + dev_dbg(dev, "mtk_crtc %d failed to create cmdq packet\n", + drm_crtc_index(&mtk_crtc->base)); + mbox_free_channel(mtk_crtc->cmdq_client.chan); + mtk_crtc->cmdq_client.chan = NULL; + } } } #endif From 8a2dbdeccef6de47565638abdf3c25f41cdffc37 Mon Sep 17 00:00:00 2001 From: Rex-BC Chen Date: Mon, 21 Mar 2022 15:23:20 +0800 Subject: [PATCH 0407/1056] drm/mediatek: Add vblank register/unregister callback functions [ Upstream commit b74d921b900b6ce38c6247c0a1c86be9f3746493 ] We encountered a kernel panic issue that callback data will be NULL when it's using in ovl irq handler. There is a timing issue between mtk_disp_ovl_irq_handler() and mtk_ovl_disable_vblank(). To resolve this issue, we use the flow to register/unregister vblank cb: - Register callback function and callback data when crtc creates. - Unregister callback function and callback data when crtc destroies. With this solution, we can assure callback data will not be NULL when vblank is disable. Link: https://patchwork.kernel.org/project/linux-mediatek/patch/20220321072320.15019-1-rex-bc.chen@mediatek.com/ Fixes: 9b0704988b15 ("drm/mediatek: Register vblank callback function") Signed-off-by: Rex-BC Chen Reviewed-by: jason-jh.lin Signed-off-by: Chun-Kuang Hu Signed-off-by: Sasha Levin --- drivers/gpu/drm/mediatek/mtk_disp_drv.h | 16 +++++++----- drivers/gpu/drm/mediatek/mtk_disp_ovl.c | 22 ++++++++++++---- drivers/gpu/drm/mediatek/mtk_disp_rdma.c | 20 +++++++++----- drivers/gpu/drm/mediatek/mtk_drm_crtc.c | 14 +++++++++- drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.c | 4 +++ drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.h | 29 ++++++++++++++++----- 6 files changed, 80 insertions(+), 25 deletions(-) diff --git a/drivers/gpu/drm/mediatek/mtk_disp_drv.h b/drivers/gpu/drm/mediatek/mtk_disp_drv.h index 86c3068894b116..974462831133b4 100644 --- a/drivers/gpu/drm/mediatek/mtk_disp_drv.h +++ b/drivers/gpu/drm/mediatek/mtk_disp_drv.h @@ -76,9 +76,11 @@ void mtk_ovl_layer_off(struct device *dev, unsigned int idx, void mtk_ovl_start(struct device *dev); void mtk_ovl_stop(struct device *dev); unsigned int mtk_ovl_supported_rotations(struct device *dev); -void mtk_ovl_enable_vblank(struct device *dev, - void (*vblank_cb)(void *), - void *vblank_cb_data); +void mtk_ovl_register_vblank_cb(struct device *dev, + void (*vblank_cb)(void *), + void *vblank_cb_data); +void mtk_ovl_unregister_vblank_cb(struct device *dev); +void mtk_ovl_enable_vblank(struct device *dev); void mtk_ovl_disable_vblank(struct device *dev); void mtk_rdma_bypass_shadow(struct device *dev); @@ -93,9 +95,11 @@ void mtk_rdma_layer_config(struct device *dev, unsigned int idx, struct cmdq_pkt *cmdq_pkt); void mtk_rdma_start(struct device *dev); void mtk_rdma_stop(struct device *dev); -void mtk_rdma_enable_vblank(struct device *dev, - void (*vblank_cb)(void *), - void *vblank_cb_data); +void mtk_rdma_register_vblank_cb(struct device *dev, + void (*vblank_cb)(void *), + void *vblank_cb_data); +void mtk_rdma_unregister_vblank_cb(struct device *dev); +void mtk_rdma_enable_vblank(struct device *dev); void mtk_rdma_disable_vblank(struct device *dev); #endif diff --git a/drivers/gpu/drm/mediatek/mtk_disp_ovl.c b/drivers/gpu/drm/mediatek/mtk_disp_ovl.c index 5326989d520615..411cf0f2166118 100644 --- a/drivers/gpu/drm/mediatek/mtk_disp_ovl.c +++ b/drivers/gpu/drm/mediatek/mtk_disp_ovl.c @@ -96,14 +96,28 @@ static irqreturn_t mtk_disp_ovl_irq_handler(int irq, void *dev_id) return IRQ_HANDLED; } -void mtk_ovl_enable_vblank(struct device *dev, - void (*vblank_cb)(void *), - void *vblank_cb_data) +void mtk_ovl_register_vblank_cb(struct device *dev, + void (*vblank_cb)(void *), + void *vblank_cb_data) { struct mtk_disp_ovl *ovl = dev_get_drvdata(dev); ovl->vblank_cb = vblank_cb; ovl->vblank_cb_data = vblank_cb_data; +} + +void mtk_ovl_unregister_vblank_cb(struct device *dev) +{ + struct mtk_disp_ovl *ovl = dev_get_drvdata(dev); + + ovl->vblank_cb = NULL; + ovl->vblank_cb_data = NULL; +} + +void mtk_ovl_enable_vblank(struct device *dev) +{ + struct mtk_disp_ovl *ovl = dev_get_drvdata(dev); + writel(0x0, ovl->regs + DISP_REG_OVL_INTSTA); writel_relaxed(OVL_FME_CPL_INT, ovl->regs + DISP_REG_OVL_INTEN); } @@ -112,8 +126,6 @@ void mtk_ovl_disable_vblank(struct device *dev) { struct mtk_disp_ovl *ovl = dev_get_drvdata(dev); - ovl->vblank_cb = NULL; - ovl->vblank_cb_data = NULL; writel_relaxed(0x0, ovl->regs + DISP_REG_OVL_INTEN); } diff --git a/drivers/gpu/drm/mediatek/mtk_disp_rdma.c b/drivers/gpu/drm/mediatek/mtk_disp_rdma.c index 75d7f45579e266..a6a6cb5f75af76 100644 --- a/drivers/gpu/drm/mediatek/mtk_disp_rdma.c +++ b/drivers/gpu/drm/mediatek/mtk_disp_rdma.c @@ -94,24 +94,32 @@ static void rdma_update_bits(struct device *dev, unsigned int reg, writel(tmp, rdma->regs + reg); } -void mtk_rdma_enable_vblank(struct device *dev, - void (*vblank_cb)(void *), - void *vblank_cb_data) +void mtk_rdma_register_vblank_cb(struct device *dev, + void (*vblank_cb)(void *), + void *vblank_cb_data) { struct mtk_disp_rdma *rdma = dev_get_drvdata(dev); rdma->vblank_cb = vblank_cb; rdma->vblank_cb_data = vblank_cb_data; - rdma_update_bits(dev, DISP_REG_RDMA_INT_ENABLE, RDMA_FRAME_END_INT, - RDMA_FRAME_END_INT); } -void mtk_rdma_disable_vblank(struct device *dev) +void mtk_rdma_unregister_vblank_cb(struct device *dev) { struct mtk_disp_rdma *rdma = dev_get_drvdata(dev); rdma->vblank_cb = NULL; rdma->vblank_cb_data = NULL; +} + +void mtk_rdma_enable_vblank(struct device *dev) +{ + rdma_update_bits(dev, DISP_REG_RDMA_INT_ENABLE, RDMA_FRAME_END_INT, + RDMA_FRAME_END_INT); +} + +void mtk_rdma_disable_vblank(struct device *dev) +{ rdma_update_bits(dev, DISP_REG_RDMA_INT_ENABLE, RDMA_FRAME_END_INT, 0); } diff --git a/drivers/gpu/drm/mediatek/mtk_drm_crtc.c b/drivers/gpu/drm/mediatek/mtk_drm_crtc.c index ffa54b416ca76a..34bb6c713a908c 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_crtc.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_crtc.c @@ -152,11 +152,20 @@ static void mtk_drm_cmdq_pkt_destroy(struct cmdq_pkt *pkt) static void mtk_drm_crtc_destroy(struct drm_crtc *crtc) { struct mtk_drm_crtc *mtk_crtc = to_mtk_crtc(crtc); + int i; mtk_mutex_put(mtk_crtc->mutex); #if IS_REACHABLE(CONFIG_MTK_CMDQ) mtk_drm_cmdq_pkt_destroy(&mtk_crtc->cmdq_handle); #endif + + for (i = 0; i < mtk_crtc->ddp_comp_nr; i++) { + struct mtk_ddp_comp *comp; + + comp = mtk_crtc->ddp_comp[i]; + mtk_ddp_comp_unregister_vblank_cb(comp); + } + drm_crtc_cleanup(crtc); } @@ -570,7 +579,7 @@ static int mtk_drm_crtc_enable_vblank(struct drm_crtc *crtc) struct mtk_drm_crtc *mtk_crtc = to_mtk_crtc(crtc); struct mtk_ddp_comp *comp = mtk_crtc->ddp_comp[0]; - mtk_ddp_comp_enable_vblank(comp, mtk_crtc_ddp_irq, &mtk_crtc->base); + mtk_ddp_comp_enable_vblank(comp); return 0; } @@ -870,6 +879,9 @@ int mtk_drm_crtc_create(struct drm_device *drm_dev, if (comp->funcs->ctm_set) has_ctm = true; } + + mtk_ddp_comp_register_vblank_cb(comp, mtk_crtc_ddp_irq, + &mtk_crtc->base); } for (i = 0; i < mtk_crtc->ddp_comp_nr; i++) diff --git a/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.c b/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.c index 99cbf44463e402..22d23668b48407 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.c @@ -276,6 +276,8 @@ static const struct mtk_ddp_comp_funcs ddp_ovl = { .config = mtk_ovl_config, .start = mtk_ovl_start, .stop = mtk_ovl_stop, + .register_vblank_cb = mtk_ovl_register_vblank_cb, + .unregister_vblank_cb = mtk_ovl_unregister_vblank_cb, .enable_vblank = mtk_ovl_enable_vblank, .disable_vblank = mtk_ovl_disable_vblank, .supported_rotations = mtk_ovl_supported_rotations, @@ -292,6 +294,8 @@ static const struct mtk_ddp_comp_funcs ddp_rdma = { .config = mtk_rdma_config, .start = mtk_rdma_start, .stop = mtk_rdma_stop, + .register_vblank_cb = mtk_rdma_register_vblank_cb, + .unregister_vblank_cb = mtk_rdma_unregister_vblank_cb, .enable_vblank = mtk_rdma_enable_vblank, .disable_vblank = mtk_rdma_disable_vblank, .layer_nr = mtk_rdma_layer_nr, diff --git a/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.h b/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.h index bb914d976cf5d3..25cb50f2391fa6 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.h +++ b/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.h @@ -47,9 +47,11 @@ struct mtk_ddp_comp_funcs { unsigned int bpc, struct cmdq_pkt *cmdq_pkt); void (*start)(struct device *dev); void (*stop)(struct device *dev); - void (*enable_vblank)(struct device *dev, - void (*vblank_cb)(void *), - void *vblank_cb_data); + void (*register_vblank_cb)(struct device *dev, + void (*vblank_cb)(void *), + void *vblank_cb_data); + void (*unregister_vblank_cb)(struct device *dev); + void (*enable_vblank)(struct device *dev); void (*disable_vblank)(struct device *dev); unsigned int (*supported_rotations)(struct device *dev); unsigned int (*layer_nr)(struct device *dev); @@ -110,12 +112,25 @@ static inline void mtk_ddp_comp_stop(struct mtk_ddp_comp *comp) comp->funcs->stop(comp->dev); } -static inline void mtk_ddp_comp_enable_vblank(struct mtk_ddp_comp *comp, - void (*vblank_cb)(void *), - void *vblank_cb_data) +static inline void mtk_ddp_comp_register_vblank_cb(struct mtk_ddp_comp *comp, + void (*vblank_cb)(void *), + void *vblank_cb_data) +{ + if (comp->funcs && comp->funcs->register_vblank_cb) + comp->funcs->register_vblank_cb(comp->dev, vblank_cb, + vblank_cb_data); +} + +static inline void mtk_ddp_comp_unregister_vblank_cb(struct mtk_ddp_comp *comp) +{ + if (comp->funcs && comp->funcs->unregister_vblank_cb) + comp->funcs->unregister_vblank_cb(comp->dev); +} + +static inline void mtk_ddp_comp_enable_vblank(struct mtk_ddp_comp *comp) { if (comp->funcs && comp->funcs->enable_vblank) - comp->funcs->enable_vblank(comp->dev, vblank_cb, vblank_cb_data); + comp->funcs->enable_vblank(comp->dev); } static inline void mtk_ddp_comp_disable_vblank(struct mtk_ddp_comp *comp) From 5781bb8a31910c2621cddeed739b99bfb0ddf1aa Mon Sep 17 00:00:00 2001 From: Niels Dossche Date: Sat, 23 Apr 2022 00:31:17 +0200 Subject: [PATCH 0408/1056] Bluetooth: protect le accept and resolv lists with hdev->lock [ Upstream commit 5e2b6064cbc5fd582396768c5f9583f65085e368 ] Concurrent operations from events on le_{accept,resolv}_list are currently unprotected by hdev->lock. Most existing code do already protect the lists with that lock. This can be observed in hci_debugfs and hci_sync. Add the protection for these events too. Fixes: b950aa88638c ("Bluetooth: Add definitions and track LE resolve list modification") Fixes: 0f36b589e4ee ("Bluetooth: Track LE white list modification via HCI commands") Signed-off-by: Niels Dossche Signed-off-by: Marcel Holtmann Signed-off-by: Sasha Levin --- net/bluetooth/hci_event.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 5ac3aca6deeb33..2337e9275863ea 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -1559,7 +1559,9 @@ static void hci_cc_le_clear_accept_list(struct hci_dev *hdev, if (status) return; + hci_dev_lock(hdev); hci_bdaddr_list_clear(&hdev->le_accept_list); + hci_dev_unlock(hdev); } static void hci_cc_le_add_to_accept_list(struct hci_dev *hdev, @@ -1577,8 +1579,10 @@ static void hci_cc_le_add_to_accept_list(struct hci_dev *hdev, if (!sent) return; + hci_dev_lock(hdev); hci_bdaddr_list_add(&hdev->le_accept_list, &sent->bdaddr, sent->bdaddr_type); + hci_dev_unlock(hdev); } static void hci_cc_le_del_from_accept_list(struct hci_dev *hdev, @@ -1596,8 +1600,10 @@ static void hci_cc_le_del_from_accept_list(struct hci_dev *hdev, if (!sent) return; + hci_dev_lock(hdev); hci_bdaddr_list_del(&hdev->le_accept_list, &sent->bdaddr, sent->bdaddr_type); + hci_dev_unlock(hdev); } static void hci_cc_le_read_supported_states(struct hci_dev *hdev, @@ -1661,9 +1667,11 @@ static void hci_cc_le_add_to_resolv_list(struct hci_dev *hdev, if (!sent) return; + hci_dev_lock(hdev); hci_bdaddr_list_add_with_irk(&hdev->le_resolv_list, &sent->bdaddr, sent->bdaddr_type, sent->peer_irk, sent->local_irk); + hci_dev_unlock(hdev); } static void hci_cc_le_del_from_resolv_list(struct hci_dev *hdev, @@ -1681,8 +1689,10 @@ static void hci_cc_le_del_from_resolv_list(struct hci_dev *hdev, if (!sent) return; + hci_dev_lock(hdev); hci_bdaddr_list_del_with_irk(&hdev->le_resolv_list, &sent->bdaddr, sent->bdaddr_type); + hci_dev_unlock(hdev); } static void hci_cc_le_clear_resolv_list(struct hci_dev *hdev, @@ -1695,7 +1705,9 @@ static void hci_cc_le_clear_resolv_list(struct hci_dev *hdev, if (status) return; + hci_dev_lock(hdev); hci_bdaddr_list_clear(&hdev->le_resolv_list); + hci_dev_unlock(hdev); } static void hci_cc_le_read_resolv_list_size(struct hci_dev *hdev, From b3cec8a42fcd11d05313c724f27e01b1db77522c Mon Sep 17 00:00:00 2001 From: Sean Wang Date: Thu, 12 May 2022 06:22:15 +0800 Subject: [PATCH 0409/1056] Bluetooth: btmtksdio: fix use-after-free at btmtksdio_recv_event [ Upstream commit 0fab6361c4ba17d1b43a991bef4238a3c1754d35 ] We should not access skb buffer data anymore after hci_recv_frame was called. [ 39.634809] BUG: KASAN: use-after-free in btmtksdio_recv_event+0x1b0 [ 39.634855] Read of size 1 at addr ffffff80cf28a60d by task kworker [ 39.634962] Call trace: [ 39.634974] dump_backtrace+0x0/0x3b8 [ 39.634999] show_stack+0x20/0x2c [ 39.635016] dump_stack_lvl+0x60/0x78 [ 39.635040] print_address_description+0x70/0x2f0 [ 39.635062] kasan_report+0x154/0x194 [ 39.635079] __asan_report_load1_noabort+0x44/0x50 [ 39.635099] btmtksdio_recv_event+0x1b0/0x1c4 [ 39.635129] btmtksdio_txrx_work+0x6cc/0xac4 [ 39.635157] process_one_work+0x560/0xc5c [ 39.635177] worker_thread+0x7ec/0xcc0 [ 39.635195] kthread+0x2d0/0x3d0 [ 39.635215] ret_from_fork+0x10/0x20 [ 39.635247] Allocated by task 0: [ 39.635260] (stack is not available) [ 39.635281] Freed by task 2392: [ 39.635295] kasan_save_stack+0x38/0x68 [ 39.635319] kasan_set_track+0x28/0x3c [ 39.635338] kasan_set_free_info+0x28/0x4c [ 39.635357] ____kasan_slab_free+0x104/0x150 [ 39.635374] __kasan_slab_free+0x18/0x28 [ 39.635391] slab_free_freelist_hook+0x114/0x248 [ 39.635410] kfree+0xf8/0x2b4 [ 39.635427] skb_free_head+0x58/0x98 [ 39.635447] skb_release_data+0x2f4/0x410 [ 39.635464] skb_release_all+0x50/0x60 [ 39.635481] kfree_skb+0xc8/0x25c [ 39.635498] hci_event_packet+0x894/0xca4 [bluetooth] [ 39.635721] hci_rx_work+0x1c8/0x68c [bluetooth] [ 39.635925] process_one_work+0x560/0xc5c [ 39.635951] worker_thread+0x7ec/0xcc0 [ 39.635970] kthread+0x2d0/0x3d0 [ 39.635990] ret_from_fork+0x10/0x20 [ 39.636021] The buggy address belongs to the object at ffffff80cf28a600 which belongs to the cache kmalloc-512 of size 512 [ 39.636039] The buggy address is located 13 bytes inside of 512-byte region [ffffff80cf28a600, ffffff80cf28a800) Fixes: 9aebfd4a2200 ("Bluetooth: mediatek: add support for MediaTek MT7663S and MT7668S SDIO devices") Co-developed-by: Yake Yang Signed-off-by: Yake Yang Signed-off-by: Sean Wang Signed-off-by: Marcel Holtmann Signed-off-by: Sasha Levin --- drivers/bluetooth/btmtksdio.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/bluetooth/btmtksdio.c b/drivers/bluetooth/btmtksdio.c index ff1f5dfbb6dbf1..d66e4df171d206 100644 --- a/drivers/bluetooth/btmtksdio.c +++ b/drivers/bluetooth/btmtksdio.c @@ -331,6 +331,7 @@ static int btmtksdio_recv_event(struct hci_dev *hdev, struct sk_buff *skb) { struct btmtksdio_dev *bdev = hci_get_drvdata(hdev); struct hci_event_hdr *hdr = (void *)skb->data; + u8 evt = hdr->evt; int err; /* Fix up the vendor event id with 0xff for vendor specific instead @@ -355,7 +356,7 @@ static int btmtksdio_recv_event(struct hci_dev *hdev, struct sk_buff *skb) if (err < 0) goto err_free_skb; - if (hdr->evt == HCI_EV_VENDOR) { + if (evt == HCI_EV_VENDOR) { if (test_and_clear_bit(BTMTKSDIO_TX_WAIT_VND_EVT, &bdev->tx_state)) { /* Barrier to sync with other CPUs */ From 51ebf1b6a077fb2c54eae28ff765c299833193ee Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Fri, 13 May 2022 11:24:56 +0100 Subject: [PATCH 0410/1056] io_uring: avoid io-wq -EAGAIN looping for !IOPOLL [ Upstream commit e0deb6a025ae8c850dc8685be39fb27b06c88736 ] If an opcode handler semi-reliably returns -EAGAIN, io_wq_submit_work() might continue busily hammer the same handler over and over again, which is not ideal. The -EAGAIN handling in question was put there only for IOPOLL, so restrict it to IOPOLL mode only where there is no other recourse than to retry as we cannot wait. Fixes: def596e9557c9 ("io_uring: support for IO polling") Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/f168b4f24181942f3614dd8ff648221736f572e6.1652433740.git.asml.silence@gmail.com Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- fs/io_uring.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 0c5dcda0b62215..9bff14c5e2b265 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -6866,7 +6866,7 @@ static void io_wq_submit_work(struct io_wq_work *work) * forcing a sync submission from here, since we can't * wait for request slots on the block side. */ - if (ret != -EAGAIN) + if (ret != -EAGAIN || !(req->ctx->flags & IORING_SETUP_IOPOLL)) break; cond_resched(); } while (1); From b82dfacba5762da1c697a180d579624fc85a5e15 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Fri, 13 May 2022 14:30:36 +0100 Subject: [PATCH 0411/1056] irqchip/gic-v3: Ensure pseudo-NMIs have an ISB between ack and handling [ Upstream commit adf14453d2c037ab529040c1186ea32e277e783a ] There are cases where a context synchronization event is necessary between an IRQ being raised and being handled, and there are races such that we cannot rely upon the exception entry being subsequent to the interrupt being raised. We identified and fixes this for regular IRQs in commit: 39a06b67c2c1256b ("irqchip/gic: Ensure we have an ISB between ack and ->handle_irq") Unfortunately, we forgot to do the same for psuedo-NMIs when support for those was added in commit: f32c926651dcd168 ("irqchip/gic-v3: Handle pseudo-NMIs") Which means that when pseudo-NMIs are used for PMU support, we'll hit the same problem. Apply the same fix as for regular IRQs. Note that when EOI mode 1 is in use, the call to gic_write_eoir() will provide an ISB. Fixes: f32c926651dcd168 ("irqchip/gic-v3: Handle pseudo-NMIs") Signed-off-by: Mark Rutland Cc: Marc Zyngier Cc: Thomas Gleixner Cc: Will Deacon Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20220513133038.226182-2-mark.rutland@arm.com Signed-off-by: Sasha Levin --- drivers/irqchip/irq-gic-v3.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c index fd4fb1b3578710..d8ea330454f4a1 100644 --- a/drivers/irqchip/irq-gic-v3.c +++ b/drivers/irqchip/irq-gic-v3.c @@ -654,6 +654,9 @@ static inline void gic_handle_nmi(u32 irqnr, struct pt_regs *regs) if (static_branch_likely(&supports_deactivate_key)) gic_write_eoir(irqnr); + else + isb() + /* * Leave the PSR.I bit set to prevent other NMIs to be * received while handling this one. From d74b09b933b99259c77906d9f10154df5bc4750a Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Fri, 13 May 2022 14:30:37 +0100 Subject: [PATCH 0412/1056] irqchip/gic-v3: Refactor ISB + EOIR at ack time commit 6efb50923771f392122f5ce69dfc43b08f16e449 upstream. There are cases where a context synchronization event is necessary between an IRQ being raised and being handled, and there are races such that we cannot rely upon the exception entry being subsequent to the interrupt being raised. To fix this, we place an ISB between a read of IAR and the subsequent invocation of an IRQ handler. When EOI mode 1 is in use, we need to EOI an interrupt prior to invoking its handler, and we have a write to EOIR for this. As this write to EOIR requires an ISB, and this is provided by the gic_write_eoir() helper, we omit the usual ISB in this case, with the logic being: | if (static_branch_likely(&supports_deactivate_key)) | gic_write_eoir(irqnr); | else | isb(); This is somewhat opaque, and it would be a little clearer if there were an unconditional ISB, with only the write to EOIR being conditional, e.g. | if (static_branch_likely(&supports_deactivate_key)) | write_gicreg(irqnr, ICC_EOIR1_EL1); | | isb(); This patch rewrites the code that way, with this logic factored into a new helper function with comments explaining what the ISB is for, as were originally laid out in commit: 39a06b67c2c1256b ("irqchip/gic: Ensure we have an ISB between ack and ->handle_irq") Note that since then, we removed the IAR polling in commit: 342677d70ab92142 ("irqchip/gic-v3: Remove acknowledge loop") ... which removed one of the two race conditions. For consistency, other portions of the driver are made to manipulate EOIR using write_gicreg() and explcit ISBs, and the gic_write_eoir() helper function is removed. There should be no functional change as a result of this patch. Signed-off-by: Mark Rutland Cc: Marc Zyngier Cc: Thomas Gleixner Cc: Will Deacon Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20220513133038.226182-3-mark.rutland@arm.com Cc: Jon Hunter Signed-off-by: Greg Kroah-Hartman --- arch/arm/include/asm/arch_gicv3.h | 7 +---- arch/arm64/include/asm/arch_gicv3.h | 6 ---- drivers/irqchip/irq-gic-v3.c | 43 ++++++++++++++++++++++------- 3 files changed, 34 insertions(+), 22 deletions(-) diff --git a/arch/arm/include/asm/arch_gicv3.h b/arch/arm/include/asm/arch_gicv3.h index 413abfb42989e9..f82a819eb0dbb4 100644 --- a/arch/arm/include/asm/arch_gicv3.h +++ b/arch/arm/include/asm/arch_gicv3.h @@ -48,6 +48,7 @@ static inline u32 read_ ## a64(void) \ return read_sysreg(a32); \ } \ +CPUIF_MAP(ICC_EOIR1, ICC_EOIR1_EL1) CPUIF_MAP(ICC_PMR, ICC_PMR_EL1) CPUIF_MAP(ICC_AP0R0, ICC_AP0R0_EL1) CPUIF_MAP(ICC_AP0R1, ICC_AP0R1_EL1) @@ -63,12 +64,6 @@ CPUIF_MAP(ICC_AP1R3, ICC_AP1R3_EL1) /* Low-level accessors */ -static inline void gic_write_eoir(u32 irq) -{ - write_sysreg(irq, ICC_EOIR1); - isb(); -} - static inline void gic_write_dir(u32 val) { write_sysreg(val, ICC_DIR); diff --git a/arch/arm64/include/asm/arch_gicv3.h b/arch/arm64/include/asm/arch_gicv3.h index 4ad22c3135dbb9..5a0f792492af00 100644 --- a/arch/arm64/include/asm/arch_gicv3.h +++ b/arch/arm64/include/asm/arch_gicv3.h @@ -26,12 +26,6 @@ * sets the GP register's most significant bits to 0 with an explicit cast. */ -static inline void gic_write_eoir(u32 irq) -{ - write_sysreg_s(irq, SYS_ICC_EOIR1_EL1); - isb(); -} - static __always_inline void gic_write_dir(u32 irq) { write_sysreg_s(irq, SYS_ICC_DIR_EL1); diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c index d8ea330454f4a1..9507989bf2e1e8 100644 --- a/drivers/irqchip/irq-gic-v3.c +++ b/drivers/irqchip/irq-gic-v3.c @@ -556,7 +556,8 @@ static void gic_irq_nmi_teardown(struct irq_data *d) static void gic_eoi_irq(struct irq_data *d) { - gic_write_eoir(gic_irq(d)); + write_gicreg(gic_irq(d), ICC_EOIR1_EL1); + isb(); } static void gic_eoimode1_eoi_irq(struct irq_data *d) @@ -640,10 +641,38 @@ static void gic_deactivate_unhandled(u32 irqnr) if (irqnr < 8192) gic_write_dir(irqnr); } else { - gic_write_eoir(irqnr); + write_gicreg(irqnr, ICC_EOIR1_EL1); + isb(); } } +/* + * Follow a read of the IAR with any HW maintenance that needs to happen prior + * to invoking the relevant IRQ handler. We must do two things: + * + * (1) Ensure instruction ordering between a read of IAR and subsequent + * instructions in the IRQ handler using an ISB. + * + * It is possible for the IAR to report an IRQ which was signalled *after* + * the CPU took an IRQ exception as multiple interrupts can race to be + * recognized by the GIC, earlier interrupts could be withdrawn, and/or + * later interrupts could be prioritized by the GIC. + * + * For devices which are tightly coupled to the CPU, such as PMUs, a + * context synchronization event is necessary to ensure that system + * register state is not stale, as these may have been indirectly written + * *after* exception entry. + * + * (2) Deactivate the interrupt when EOI mode 1 is in use. + */ +static inline void gic_complete_ack(u32 irqnr) +{ + if (static_branch_likely(&supports_deactivate_key)) + write_gicreg(irqnr, ICC_EOIR1_EL1); + + isb(); +} + static inline void gic_handle_nmi(u32 irqnr, struct pt_regs *regs) { bool irqs_enabled = interrupts_enabled(regs); @@ -652,10 +681,7 @@ static inline void gic_handle_nmi(u32 irqnr, struct pt_regs *regs) if (irqs_enabled) nmi_enter(); - if (static_branch_likely(&supports_deactivate_key)) - gic_write_eoir(irqnr); - else - isb() + gic_complete_ack(irqnr); /* * Leave the PSR.I bit set to prevent other NMIs to be @@ -726,10 +752,7 @@ static asmlinkage void __exception_irq_entry gic_handle_irq(struct pt_regs *regs gic_arch_enable_irqs(); } - if (static_branch_likely(&supports_deactivate_key)) - gic_write_eoir(irqnr); - else - isb(); + gic_complete_ack(irqnr); if (handle_domain_irq(gic_data.domain, irqnr, regs)) { WARN_ONCE(true, "Unexpected interrupt received!\n"); From 8371666ef44cb5a343307cc741f4b86d5be6dad7 Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 21 May 2022 08:45:28 +0100 Subject: [PATCH 0413/1056] rxrpc: Fix locking issue [ Upstream commit ad25f5cb39872ca14bcbe00816ae65c22fe04b89 ] There's a locking issue with the per-netns list of calls in rxrpc. The pieces of code that add and remove a call from the list use write_lock() and the calls procfile uses read_lock() to access it. However, the timer callback function may trigger a removal by trying to queue a call for processing and finding that it's already queued - at which point it has a spare refcount that it has to do something with. Unfortunately, if it puts the call and this reduces the refcount to 0, the call will be removed from the list. Unfortunately, since the _bh variants of the locking functions aren't used, this can deadlock. ================================ WARNING: inconsistent lock state 5.18.0-rc3-build4+ #10 Not tainted -------------------------------- inconsistent {SOFTIRQ-ON-W} -> {IN-SOFTIRQ-W} usage. ksoftirqd/2/25 [HC0[0]:SC1[1]:HE1:SE0] takes: ffff888107ac4038 (&rxnet->call_lock){+.?.}-{2:2}, at: rxrpc_put_call+0x103/0x14b {SOFTIRQ-ON-W} state was registered at: ... Possible unsafe locking scenario: CPU0 ---- lock(&rxnet->call_lock); lock(&rxnet->call_lock); *** DEADLOCK *** 1 lock held by ksoftirqd/2/25: #0: ffff8881008ffdb0 ((&call->timer)){+.-.}-{0:0}, at: call_timer_fn+0x5/0x23d Changes ======= ver #2) - Changed to using list_next_rcu() rather than rcu_dereference() directly. Fixes: 17926a79320a ("[AF_RXRPC]: Provide secure RxRPC sockets for use by userspace and kernel both") Signed-off-by: David Howells cc: Marc Dionne cc: linux-afs@lists.infradead.org Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- fs/seq_file.c | 32 ++++++++++++++++++++++++++++++++ include/linux/list.h | 10 ++++++++++ include/linux/seq_file.h | 4 ++++ net/rxrpc/ar-internal.h | 2 +- net/rxrpc/call_accept.c | 6 +++--- net/rxrpc/call_object.c | 18 +++++++++--------- net/rxrpc/net_ns.c | 2 +- net/rxrpc/proc.c | 10 ++-------- 8 files changed, 62 insertions(+), 22 deletions(-) diff --git a/fs/seq_file.c b/fs/seq_file.c index 4a2cda04d3e293..b17ee4c4f618a4 100644 --- a/fs/seq_file.c +++ b/fs/seq_file.c @@ -947,6 +947,38 @@ struct list_head *seq_list_next(void *v, struct list_head *head, loff_t *ppos) } EXPORT_SYMBOL(seq_list_next); +struct list_head *seq_list_start_rcu(struct list_head *head, loff_t pos) +{ + struct list_head *lh; + + list_for_each_rcu(lh, head) + if (pos-- == 0) + return lh; + + return NULL; +} +EXPORT_SYMBOL(seq_list_start_rcu); + +struct list_head *seq_list_start_head_rcu(struct list_head *head, loff_t pos) +{ + if (!pos) + return head; + + return seq_list_start_rcu(head, pos - 1); +} +EXPORT_SYMBOL(seq_list_start_head_rcu); + +struct list_head *seq_list_next_rcu(void *v, struct list_head *head, + loff_t *ppos) +{ + struct list_head *lh; + + lh = list_next_rcu((struct list_head *)v); + ++*ppos; + return lh == head ? NULL : lh; +} +EXPORT_SYMBOL(seq_list_next_rcu); + /** * seq_hlist_start - start an iteration of a hlist * @head: the head of the hlist diff --git a/include/linux/list.h b/include/linux/list.h index a119dd1990d4ef..d206ae93c06daa 100644 --- a/include/linux/list.h +++ b/include/linux/list.h @@ -577,6 +577,16 @@ static inline void list_splice_tail_init(struct list_head *list, #define list_for_each(pos, head) \ for (pos = (head)->next; !list_is_head(pos, (head)); pos = pos->next) +/** + * list_for_each_rcu - Iterate over a list in an RCU-safe fashion + * @pos: the &struct list_head to use as a loop cursor. + * @head: the head for your list. + */ +#define list_for_each_rcu(pos, head) \ + for (pos = rcu_dereference((head)->next); \ + !list_is_head(pos, (head)); \ + pos = rcu_dereference(pos->next)) + /** * list_for_each_continue - continue iteration over a list * @pos: the &struct list_head to use as a loop cursor. diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h index 5733890df64f57..0b429111f85e4b 100644 --- a/include/linux/seq_file.h +++ b/include/linux/seq_file.h @@ -261,6 +261,10 @@ extern struct list_head *seq_list_start_head(struct list_head *head, extern struct list_head *seq_list_next(void *v, struct list_head *head, loff_t *ppos); +extern struct list_head *seq_list_start_rcu(struct list_head *head, loff_t pos); +extern struct list_head *seq_list_start_head_rcu(struct list_head *head, loff_t pos); +extern struct list_head *seq_list_next_rcu(void *v, struct list_head *head, loff_t *ppos); + /* * Helpers for iteration over hlist_head-s in seq_files */ diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index dce056adb78cf4..f2d593e27b64f9 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -68,7 +68,7 @@ struct rxrpc_net { struct proc_dir_entry *proc_net; /* Subdir in /proc/net */ u32 epoch; /* Local epoch for detecting local-end reset */ struct list_head calls; /* List of calls active in this namespace */ - rwlock_t call_lock; /* Lock for ->calls */ + spinlock_t call_lock; /* Lock for ->calls */ atomic_t nr_calls; /* Count of allocated calls */ atomic_t nr_conns; diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c index 1ae90fb979362b..8b24ffbc72efb3 100644 --- a/net/rxrpc/call_accept.c +++ b/net/rxrpc/call_accept.c @@ -140,9 +140,9 @@ static int rxrpc_service_prealloc_one(struct rxrpc_sock *rx, write_unlock(&rx->call_lock); rxnet = call->rxnet; - write_lock(&rxnet->call_lock); - list_add_tail(&call->link, &rxnet->calls); - write_unlock(&rxnet->call_lock); + spin_lock_bh(&rxnet->call_lock); + list_add_tail_rcu(&call->link, &rxnet->calls); + spin_unlock_bh(&rxnet->call_lock); b->call_backlog[call_head] = call; smp_store_release(&b->call_backlog_head, (call_head + 1) & (size - 1)); diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index 043508fd8d8a5d..25c9a2cbf048c4 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -337,9 +337,9 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx, write_unlock(&rx->call_lock); rxnet = call->rxnet; - write_lock(&rxnet->call_lock); - list_add_tail(&call->link, &rxnet->calls); - write_unlock(&rxnet->call_lock); + spin_lock_bh(&rxnet->call_lock); + list_add_tail_rcu(&call->link, &rxnet->calls); + spin_unlock_bh(&rxnet->call_lock); /* From this point on, the call is protected by its own lock. */ release_sock(&rx->sk); @@ -631,9 +631,9 @@ void rxrpc_put_call(struct rxrpc_call *call, enum rxrpc_call_trace op) ASSERTCMP(call->state, ==, RXRPC_CALL_COMPLETE); if (!list_empty(&call->link)) { - write_lock(&rxnet->call_lock); + spin_lock_bh(&rxnet->call_lock); list_del_init(&call->link); - write_unlock(&rxnet->call_lock); + spin_unlock_bh(&rxnet->call_lock); } rxrpc_cleanup_call(call); @@ -705,7 +705,7 @@ void rxrpc_destroy_all_calls(struct rxrpc_net *rxnet) _enter(""); if (!list_empty(&rxnet->calls)) { - write_lock(&rxnet->call_lock); + spin_lock_bh(&rxnet->call_lock); while (!list_empty(&rxnet->calls)) { call = list_entry(rxnet->calls.next, @@ -720,12 +720,12 @@ void rxrpc_destroy_all_calls(struct rxrpc_net *rxnet) rxrpc_call_states[call->state], call->flags, call->events); - write_unlock(&rxnet->call_lock); + spin_unlock_bh(&rxnet->call_lock); cond_resched(); - write_lock(&rxnet->call_lock); + spin_lock_bh(&rxnet->call_lock); } - write_unlock(&rxnet->call_lock); + spin_unlock_bh(&rxnet->call_lock); } atomic_dec(&rxnet->nr_calls); diff --git a/net/rxrpc/net_ns.c b/net/rxrpc/net_ns.c index cc7e30733feb0d..e4d6d432515bc5 100644 --- a/net/rxrpc/net_ns.c +++ b/net/rxrpc/net_ns.c @@ -50,7 +50,7 @@ static __net_init int rxrpc_init_net(struct net *net) rxnet->epoch |= RXRPC_RANDOM_EPOCH; INIT_LIST_HEAD(&rxnet->calls); - rwlock_init(&rxnet->call_lock); + spin_lock_init(&rxnet->call_lock); atomic_set(&rxnet->nr_calls, 1); atomic_set(&rxnet->nr_conns, 1); diff --git a/net/rxrpc/proc.c b/net/rxrpc/proc.c index e2f990754f8820..5a67955cc00f65 100644 --- a/net/rxrpc/proc.c +++ b/net/rxrpc/proc.c @@ -26,29 +26,23 @@ static const char *const rxrpc_conn_states[RXRPC_CONN__NR_STATES] = { */ static void *rxrpc_call_seq_start(struct seq_file *seq, loff_t *_pos) __acquires(rcu) - __acquires(rxnet->call_lock) { struct rxrpc_net *rxnet = rxrpc_net(seq_file_net(seq)); rcu_read_lock(); - read_lock(&rxnet->call_lock); - return seq_list_start_head(&rxnet->calls, *_pos); + return seq_list_start_head_rcu(&rxnet->calls, *_pos); } static void *rxrpc_call_seq_next(struct seq_file *seq, void *v, loff_t *pos) { struct rxrpc_net *rxnet = rxrpc_net(seq_file_net(seq)); - return seq_list_next(v, &rxnet->calls, pos); + return seq_list_next_rcu(v, &rxnet->calls, pos); } static void rxrpc_call_seq_stop(struct seq_file *seq, void *v) - __releases(rxnet->call_lock) __releases(rcu) { - struct rxrpc_net *rxnet = rxrpc_net(seq_file_net(seq)); - - read_unlock(&rxnet->call_lock); rcu_read_unlock(); } From 93dfb9c6deebd4343d4396d24335f8a712d84327 Mon Sep 17 00:00:00 2001 From: Vladimir Lypak Date: Wed, 25 Aug 2021 22:29:42 +0530 Subject: [PATCH 0414/1056] dt-bindings: soc: qcom: smd-rpm: Add compatible for MSM8953 SoC [ Upstream commit 96c42812f798c5e48d55cd6fc2101ce99af19608 ] Document compatible for MSM8953 SoC. Signed-off-by: Vladimir Lypak Signed-off-by: Adam Skladowski Signed-off-by: Sireesh Kodali Acked-by: Rob Herring Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20210825165943.19415-1-sireeshkodali1@gmail.com Signed-off-by: Sasha Levin --- Documentation/devicetree/bindings/soc/qcom/qcom,smd-rpm.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Documentation/devicetree/bindings/soc/qcom/qcom,smd-rpm.yaml b/Documentation/devicetree/bindings/soc/qcom/qcom,smd-rpm.yaml index cc3fe5ed7421ef..77963b86b714cf 100644 --- a/Documentation/devicetree/bindings/soc/qcom/qcom,smd-rpm.yaml +++ b/Documentation/devicetree/bindings/soc/qcom/qcom,smd-rpm.yaml @@ -34,6 +34,7 @@ properties: - qcom,rpm-ipq6018 - qcom,rpm-msm8226 - qcom,rpm-msm8916 + - qcom,rpm-msm8953 - qcom,rpm-msm8974 - qcom,rpm-msm8976 - qcom,rpm-msm8996 @@ -57,6 +58,7 @@ if: - qcom,rpm-apq8084 - qcom,rpm-msm8916 - qcom,rpm-msm8974 + - qcom,rpm-msm8953 then: required: - qcom,smd-channels From 82b50219c85de217caccb51b4e1d032030d735ad Mon Sep 17 00:00:00 2001 From: Bryan O'Donoghue Date: Tue, 19 Apr 2022 00:18:57 +0100 Subject: [PATCH 0415/1056] dt-bindings: soc: qcom: smd-rpm: Fix missing MSM8936 compatible [ Upstream commit e930244918092d44b60a7b538cf60d737010ceef ] Add compatible msm8936. msm8936 covers both msm8936 and msm8939. The relevant driver already has the compat string but, we haven't documented it. Fixes: d6e52482f5ab ("drivers: soc: Add MSM8936 SMD RPM compatible") Signed-off-by: Bryan O'Donoghue Acked-by: Krzysztof Kozlowski Signed-off-by: Rob Herring Link: https://lore.kernel.org/r/20220418231857.3061053-1-bryan.odonoghue@linaro.org Signed-off-by: Sasha Levin --- Documentation/devicetree/bindings/soc/qcom/qcom,smd-rpm.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/soc/qcom/qcom,smd-rpm.yaml b/Documentation/devicetree/bindings/soc/qcom/qcom,smd-rpm.yaml index 77963b86b714cf..1b0062e3c1a4b1 100644 --- a/Documentation/devicetree/bindings/soc/qcom/qcom,smd-rpm.yaml +++ b/Documentation/devicetree/bindings/soc/qcom/qcom,smd-rpm.yaml @@ -34,6 +34,7 @@ properties: - qcom,rpm-ipq6018 - qcom,rpm-msm8226 - qcom,rpm-msm8916 + - qcom,rpm-msm8936 - qcom,rpm-msm8953 - qcom,rpm-msm8974 - qcom,rpm-msm8976 From e9f331bb5d433c9be52c4e3d9d4e2e41ef4d8493 Mon Sep 17 00:00:00 2001 From: Shuah Khan Date: Fri, 15 Oct 2021 14:57:40 -0600 Subject: [PATCH 0416/1056] module: change to print useful messages from elf_validity_check() [ Upstream commit 7fd982f394c42f25a73fe9dfbf1e6b11fa26b40a ] elf_validity_check() checks ELF headers for errors and ELF Spec. compliance and if any of them fail it returns -ENOEXEC from all of these error paths. Almost all of them don't print any messages. When elf_validity_check() returns an error, load_module() prints an error message without error code. It is hard to determine why the module ELF structure is invalid, even if load_module() prints the error code which is -ENOEXEC in all of these cases. Change to print useful error messages from elf_validity_check() to clearly say what went wrong and why the ELF validity checks failed. Remove the load_module() error message which is no longer needed. This patch includes changes to fix build warns on 32-bit platforms: warning: format '%llu' expects argument of type 'long long unsigned int', but argument 3 has type 'Elf32_Off' {aka 'unsigned int'} Reported-by: kernel test robot Signed-off-by: Shuah Khan Signed-off-by: Luis Chamberlain Signed-off-by: Sasha Levin --- kernel/module.c | 75 +++++++++++++++++++++++++++++++++++-------------- 1 file changed, 54 insertions(+), 21 deletions(-) diff --git a/kernel/module.c b/kernel/module.c index 83991c2d5af9ea..256b3c80a771b4 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -2967,14 +2967,29 @@ static int elf_validity_check(struct load_info *info) Elf_Shdr *shdr, *strhdr; int err; - if (info->len < sizeof(*(info->hdr))) - return -ENOEXEC; + if (info->len < sizeof(*(info->hdr))) { + pr_err("Invalid ELF header len %lu\n", info->len); + goto no_exec; + } - if (memcmp(info->hdr->e_ident, ELFMAG, SELFMAG) != 0 - || info->hdr->e_type != ET_REL - || !elf_check_arch(info->hdr) - || info->hdr->e_shentsize != sizeof(Elf_Shdr)) - return -ENOEXEC; + if (memcmp(info->hdr->e_ident, ELFMAG, SELFMAG) != 0) { + pr_err("Invalid ELF header magic: != %s\n", ELFMAG); + goto no_exec; + } + if (info->hdr->e_type != ET_REL) { + pr_err("Invalid ELF header type: %u != %u\n", + info->hdr->e_type, ET_REL); + goto no_exec; + } + if (!elf_check_arch(info->hdr)) { + pr_err("Invalid architecture in ELF header: %u\n", + info->hdr->e_machine); + goto no_exec; + } + if (info->hdr->e_shentsize != sizeof(Elf_Shdr)) { + pr_err("Invalid ELF section header size\n"); + goto no_exec; + } /* * e_shnum is 16 bits, and sizeof(Elf_Shdr) is @@ -2983,8 +2998,10 @@ static int elf_validity_check(struct load_info *info) */ if (info->hdr->e_shoff >= info->len || (info->hdr->e_shnum * sizeof(Elf_Shdr) > - info->len - info->hdr->e_shoff)) - return -ENOEXEC; + info->len - info->hdr->e_shoff)) { + pr_err("Invalid ELF section header overflow\n"); + goto no_exec; + } info->sechdrs = (void *)info->hdr + info->hdr->e_shoff; @@ -2992,13 +3009,19 @@ static int elf_validity_check(struct load_info *info) * Verify if the section name table index is valid. */ if (info->hdr->e_shstrndx == SHN_UNDEF - || info->hdr->e_shstrndx >= info->hdr->e_shnum) - return -ENOEXEC; + || info->hdr->e_shstrndx >= info->hdr->e_shnum) { + pr_err("Invalid ELF section name index: %d || e_shstrndx (%d) >= e_shnum (%d)\n", + info->hdr->e_shstrndx, info->hdr->e_shstrndx, + info->hdr->e_shnum); + goto no_exec; + } strhdr = &info->sechdrs[info->hdr->e_shstrndx]; err = validate_section_offset(info, strhdr); - if (err < 0) + if (err < 0) { + pr_err("Invalid ELF section hdr(type %u)\n", strhdr->sh_type); return err; + } /* * The section name table must be NUL-terminated, as required @@ -3006,8 +3029,10 @@ static int elf_validity_check(struct load_info *info) * strings in the section safe. */ info->secstrings = (void *)info->hdr + strhdr->sh_offset; - if (info->secstrings[strhdr->sh_size - 1] != '\0') - return -ENOEXEC; + if (info->secstrings[strhdr->sh_size - 1] != '\0') { + pr_err("ELF Spec violation: section name table isn't null terminated\n"); + goto no_exec; + } /* * The code assumes that section 0 has a length of zero and @@ -3015,8 +3040,11 @@ static int elf_validity_check(struct load_info *info) */ if (info->sechdrs[0].sh_type != SHT_NULL || info->sechdrs[0].sh_size != 0 - || info->sechdrs[0].sh_addr != 0) - return -ENOEXEC; + || info->sechdrs[0].sh_addr != 0) { + pr_err("ELF Spec violation: section 0 type(%d)!=SH_NULL or non-zero len or addr\n", + info->sechdrs[0].sh_type); + goto no_exec; + } for (i = 1; i < info->hdr->e_shnum; i++) { shdr = &info->sechdrs[i]; @@ -3026,8 +3054,12 @@ static int elf_validity_check(struct load_info *info) continue; case SHT_SYMTAB: if (shdr->sh_link == SHN_UNDEF - || shdr->sh_link >= info->hdr->e_shnum) - return -ENOEXEC; + || shdr->sh_link >= info->hdr->e_shnum) { + pr_err("Invalid ELF sh_link!=SHN_UNDEF(%d) or (sh_link(%d) >= hdr->e_shnum(%d)\n", + shdr->sh_link, shdr->sh_link, + info->hdr->e_shnum); + goto no_exec; + } fallthrough; default: err = validate_section_offset(info, shdr); @@ -3049,6 +3081,9 @@ static int elf_validity_check(struct load_info *info) } return 0; + +no_exec: + return -ENOEXEC; } #define COPY_CHUNK_SIZE (16*PAGE_SIZE) @@ -3925,10 +3960,8 @@ static int load_module(struct load_info *info, const char __user *uargs, * sections. */ err = elf_validity_check(info); - if (err) { - pr_err("Module has invalid ELF structures\n"); + if (err) goto free_copy; - } /* * Everything checks out, so set up the section info From 09cb6663618a74fe5572a4931ecbf098832e79ec Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Wed, 4 May 2022 12:54:20 +0300 Subject: [PATCH 0417/1056] module: fix [e_shstrndx].sh_size=0 OOB access [ Upstream commit 391e982bfa632b8315235d8be9c0a81374c6a19c ] It is trivial to craft a module to trigger OOB access in this line: if (info->secstrings[strhdr->sh_size - 1] != '\0') { BUG: unable to handle page fault for address: ffffc90000aa0fff PGD 100000067 P4D 100000067 PUD 100066067 PMD 10436f067 PTE 0 Oops: 0000 [#1] PREEMPT SMP PTI CPU: 7 PID: 1215 Comm: insmod Not tainted 5.18.0-rc5-00007-g9bf578647087-dirty #10 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.14.0-4.fc34 04/01/2014 RIP: 0010:load_module+0x19b/0x2391 Fixes: ec2a29593c83 ("module: harden ELF info handling") Signed-off-by: Alexey Dobriyan [rebased patch onto modules-next] Signed-off-by: Luis Chamberlain Signed-off-by: Sasha Levin --- kernel/module.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/kernel/module.c b/kernel/module.c index 256b3c80a771b4..ef79f4dbda8769 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -3029,6 +3029,10 @@ static int elf_validity_check(struct load_info *info) * strings in the section safe. */ info->secstrings = (void *)info->hdr + strhdr->sh_offset; + if (strhdr->sh_size == 0) { + pr_err("empty section name table\n"); + goto no_exec; + } if (info->secstrings[strhdr->sh_size - 1] != '\0') { pr_err("ELF Spec violation: section name table isn't null terminated\n"); goto no_exec; From e5fde29135a451ec8b997d24e3514ed05dd466cf Mon Sep 17 00:00:00 2001 From: Yian Chen Date: Fri, 20 May 2022 17:21:15 -0700 Subject: [PATCH 0418/1056] iommu/vt-d: Fix PCI bus rescan device hot add commit 316f92a705a4c2bf4712135180d56f3cca09243a upstream. Notifier calling chain uses priority to determine the execution order of the notifiers or listeners registered to the chain. PCI bus device hot add utilizes the notification mechanism. The current code sets low priority (INT_MIN) to Intel dmar_pci_bus_notifier and postpones DMAR decoding after adding new device into IOMMU. The result is that struct device pointer cannot be found in DRHD search for the new device's DMAR/IOMMU. Subsequently, the device is put under the "catch-all" IOMMU instead of the correct one. This could cause system hang when device TLB invalidation is sent to the wrong IOMMU. Invalidation timeout error and hard lockup have been observed and data inconsistency/crush may occur as well. This patch fixes the issue by setting a positive priority(1) for dmar_pci_bus_notifier while the priority of IOMMU bus notifier uses the default value(0), therefore DMAR decoding will be in advance of DRHD search for a new device to find the correct IOMMU. Following is a 2-step example that triggers the bug by simulating PCI device hot add behavior in Intel Sapphire Rapids server. echo 1 > /sys/bus/pci/devices/0000:6a:01.0/remove echo 1 > /sys/bus/pci/rescan Fixes: 59ce0515cdaf ("iommu/vt-d: Update DRHD/RMRR/ATSR device scope") Cc: stable@vger.kernel.org # v3.15+ Reported-by: Zhang, Bernice Signed-off-by: Jacob Pan Signed-off-by: Yian Chen Link: https://lore.kernel.org/r/20220521002115.1624069-1-yian.chen@intel.com Signed-off-by: Joerg Roedel Signed-off-by: Greg Kroah-Hartman --- drivers/iommu/intel/dmar.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/intel/dmar.c b/drivers/iommu/intel/dmar.c index b7708b93f3fa18..3d9cb711e87b80 100644 --- a/drivers/iommu/intel/dmar.c +++ b/drivers/iommu/intel/dmar.c @@ -385,7 +385,7 @@ static int dmar_pci_bus_notifier(struct notifier_block *nb, static struct notifier_block dmar_pci_bus_nb = { .notifier_call = dmar_pci_bus_notifier, - .priority = INT_MIN, + .priority = 1, }; static struct dmar_drhd_unit * From 989b2c40322e1d12df34cee6425d56c5d7fea4bf Mon Sep 17 00:00:00 2001 From: Guiling Deng Date: Tue, 28 Jun 2022 09:36:41 -0700 Subject: [PATCH 0419/1056] fbdev: fbmem: Fix logo center image dx issue commit 955f04766d4e6eb94bf3baa539e096808c74ebfb upstream. Image.dx gets wrong value because of missing '()'. If xres == logo->width and n == 1, image.dx = -16. Signed-off-by: Guiling Deng Fixes: 3d8b1933eb1c ("fbdev: fbmem: add config option to center the bootup logo") Cc: stable@vger.kernel.org # v5.0+ Signed-off-by: Helge Deller Signed-off-by: Greg Kroah-Hartman --- drivers/video/fbdev/core/fbmem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/video/fbdev/core/fbmem.c b/drivers/video/fbdev/core/fbmem.c index 0371ad233fdf2e..3c697ec507ba8a 100644 --- a/drivers/video/fbdev/core/fbmem.c +++ b/drivers/video/fbdev/core/fbmem.c @@ -514,7 +514,7 @@ static int fb_show_logo_line(struct fb_info *info, int rotate, while (n && (n * (logo->width + 8) - 8 > xres)) --n; - image.dx = (xres - n * (logo->width + 8) - 8) / 2; + image.dx = (xres - (n * (logo->width + 8) - 8)) / 2; image.dy = y ?: (yres - logo->height) / 2; } else { image.dx = 0; From 738d06ef99cb3143513debec193959de50483b78 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Wed, 29 Jun 2022 15:53:55 +0200 Subject: [PATCH 0420/1056] fbmem: Check virtual screen sizes in fb_set_var() commit 6c11df58fd1ac0aefcb3b227f72769272b939e56 upstream. Verify that the fbdev or drm driver correctly adjusted the virtual screen sizes. On failure report the failing driver and reject the screen size change. Signed-off-by: Helge Deller Reviewed-by: Geert Uytterhoeven Cc: stable@vger.kernel.org # v5.4+ Signed-off-by: Greg Kroah-Hartman --- drivers/video/fbdev/core/fbmem.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/video/fbdev/core/fbmem.c b/drivers/video/fbdev/core/fbmem.c index 3c697ec507ba8a..0fe51d0d7b25ab 100644 --- a/drivers/video/fbdev/core/fbmem.c +++ b/drivers/video/fbdev/core/fbmem.c @@ -1020,6 +1020,16 @@ fb_set_var(struct fb_info *info, struct fb_var_screeninfo *var) if (ret) return ret; + /* verify that virtual resolution >= physical resolution */ + if (var->xres_virtual < var->xres || + var->yres_virtual < var->yres) { + pr_warn("WARNING: fbcon: Driver '%s' missed to adjust virtual screen size (%ux%u vs. %ux%u)\n", + info->fix.id, + var->xres_virtual, var->yres_virtual, + var->xres, var->yres); + return -EINVAL; + } + if ((var->activate & FB_ACTIVATE_MASK) != FB_ACTIVATE_NOW) return 0; From 6886327780254ba749b770373653b6afc2a339fc Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Sat, 25 Jun 2022 12:56:49 +0200 Subject: [PATCH 0421/1056] fbcon: Disallow setting font bigger than screen size commit 65a01e601dbba8b7a51a2677811f70f783766682 upstream. Prevent that users set a font size which is bigger than the physical screen. It's unlikely this may happen (because screens are usually much larger than the fonts and each font char is limited to 32x32 pixels), but it may happen on smaller screens/LCD displays. Signed-off-by: Helge Deller Reviewed-by: Daniel Vetter Reviewed-by: Geert Uytterhoeven Cc: stable@vger.kernel.org # v4.14+ Signed-off-by: Greg Kroah-Hartman --- drivers/video/fbdev/core/fbcon.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/video/fbdev/core/fbcon.c b/drivers/video/fbdev/core/fbcon.c index a25b63b56223f1..0710c5ec1014f9 100644 --- a/drivers/video/fbdev/core/fbcon.c +++ b/drivers/video/fbdev/core/fbcon.c @@ -2480,6 +2480,11 @@ static int fbcon_set_font(struct vc_data *vc, struct console_font *font, if (charcount != 256 && charcount != 512) return -EINVAL; + /* font bigger than screen resolution ? */ + if (w > FBCON_SWAP(info->var.rotate, info->var.xres, info->var.yres) || + h > FBCON_SWAP(info->var.rotate, info->var.yres, info->var.xres)) + return -EINVAL; + /* Make sure drawing engine can handle the font */ if (!(info->pixmap.blit_x & (1 << (font->width - 1))) || !(info->pixmap.blit_y & (1 << (font->height - 1)))) From 9c9e44bb3dd5233232f2379c2dde0e403b1fd642 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Sat, 25 Jun 2022 13:00:34 +0200 Subject: [PATCH 0422/1056] fbcon: Prevent that screen size is smaller than font size commit e64242caef18b4a5840b0e7a9bff37abd4f4f933 upstream. We need to prevent that users configure a screen size which is smaller than the currently selected font size. Otherwise rendering chars on the screen will access memory outside the graphics memory region. This patch adds a new function fbcon_modechange_possible() which implements this check and which later may be extended with other checks if necessary. The new function is called from the FBIOPUT_VSCREENINFO ioctl handler in fbmem.c, which will return -EINVAL if userspace asked for a too small screen size. Signed-off-by: Helge Deller Reviewed-by: Geert Uytterhoeven Cc: stable@vger.kernel.org # v5.4+ Signed-off-by: Greg Kroah-Hartman --- drivers/video/fbdev/core/fbcon.c | 28 ++++++++++++++++++++++++++++ drivers/video/fbdev/core/fbmem.c | 4 +++- include/linux/fbcon.h | 4 ++++ 3 files changed, 35 insertions(+), 1 deletion(-) diff --git a/drivers/video/fbdev/core/fbcon.c b/drivers/video/fbdev/core/fbcon.c index 0710c5ec1014f9..bb83e7c53ae0ed 100644 --- a/drivers/video/fbdev/core/fbcon.c +++ b/drivers/video/fbdev/core/fbcon.c @@ -2747,6 +2747,34 @@ void fbcon_update_vcs(struct fb_info *info, bool all) } EXPORT_SYMBOL(fbcon_update_vcs); +/* let fbcon check if it supports a new screen resolution */ +int fbcon_modechange_possible(struct fb_info *info, struct fb_var_screeninfo *var) +{ + struct fbcon_ops *ops = info->fbcon_par; + struct vc_data *vc; + unsigned int i; + + WARN_CONSOLE_UNLOCKED(); + + if (!ops) + return 0; + + /* prevent setting a screen size which is smaller than font size */ + for (i = first_fb_vc; i <= last_fb_vc; i++) { + vc = vc_cons[i].d; + if (!vc || vc->vc_mode != KD_TEXT || + registered_fb[con2fb_map[i]] != info) + continue; + + if (vc->vc_font.width > FBCON_SWAP(var->rotate, var->xres, var->yres) || + vc->vc_font.height > FBCON_SWAP(var->rotate, var->yres, var->xres)) + return -EINVAL; + } + + return 0; +} +EXPORT_SYMBOL_GPL(fbcon_modechange_possible); + int fbcon_mode_deleted(struct fb_info *info, struct fb_videomode *mode) { diff --git a/drivers/video/fbdev/core/fbmem.c b/drivers/video/fbdev/core/fbmem.c index 0fe51d0d7b25ab..6d7868cc1fca05 100644 --- a/drivers/video/fbdev/core/fbmem.c +++ b/drivers/video/fbdev/core/fbmem.c @@ -1120,7 +1120,9 @@ static long do_fb_ioctl(struct fb_info *info, unsigned int cmd, return -EFAULT; console_lock(); lock_fb_info(info); - ret = fb_set_var(info, &var); + ret = fbcon_modechange_possible(info, &var); + if (!ret) + ret = fb_set_var(info, &var); if (!ret) fbcon_update_vcs(info, var.activate & FB_ACTIVATE_ALL); unlock_fb_info(info); diff --git a/include/linux/fbcon.h b/include/linux/fbcon.h index ff5596dd30f855..2382dec6d6ab8e 100644 --- a/include/linux/fbcon.h +++ b/include/linux/fbcon.h @@ -15,6 +15,8 @@ void fbcon_new_modelist(struct fb_info *info); void fbcon_get_requirement(struct fb_info *info, struct fb_blit_caps *caps); void fbcon_fb_blanked(struct fb_info *info, int blank); +int fbcon_modechange_possible(struct fb_info *info, + struct fb_var_screeninfo *var); void fbcon_update_vcs(struct fb_info *info, bool all); void fbcon_remap_all(struct fb_info *info); int fbcon_set_con2fb_map_ioctl(void __user *argp); @@ -33,6 +35,8 @@ static inline void fbcon_new_modelist(struct fb_info *info) {} static inline void fbcon_get_requirement(struct fb_info *info, struct fb_blit_caps *caps) {} static inline void fbcon_fb_blanked(struct fb_info *info, int blank) {} +static inline int fbcon_modechange_possible(struct fb_info *info, + struct fb_var_screeninfo *var) { return 0; } static inline void fbcon_update_vcs(struct fb_info *info, bool all) {} static inline void fbcon_remap_all(struct fb_info *info) {} static inline int fbcon_set_con2fb_map_ioctl(void __user *argp) { return 0; } From 79827e53b069276785ed32f34915b7c76055e42c Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 27 Jun 2022 20:42:18 +0200 Subject: [PATCH 0423/1056] PM: runtime: Redefine pm_runtime_release_supplier() commit 07358194badf73e267289b40b761f5dc56928eab upstream. Instead of passing an extra bool argument to pm_runtime_release_supplier(), make its callers take care of triggering a runtime-suspend of the supplier device as needed. No expected functional impact. Suggested-by: Greg Kroah-Hartman Signed-off-by: Rafael J. Wysocki Reviewed-by: Greg Kroah-Hartman Cc: 5.1+ # 5.1+ Signed-off-by: Greg Kroah-Hartman --- drivers/base/core.c | 3 ++- drivers/base/power/runtime.c | 20 +++++++++----------- include/linux/pm_runtime.h | 5 ++--- 3 files changed, 13 insertions(+), 15 deletions(-) diff --git a/drivers/base/core.c b/drivers/base/core.c index 8e73a34e100555..10e027e9269262 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -485,7 +485,8 @@ static void device_link_release_fn(struct work_struct *work) /* Ensure that all references to the link object have been dropped. */ device_link_synchronize_removal(); - pm_runtime_release_supplier(link, true); + pm_runtime_release_supplier(link); + pm_request_idle(link->supplier); put_device(link->consumer); put_device(link->supplier); diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c index 44ae3909e64bb5..3179c9265471b8 100644 --- a/drivers/base/power/runtime.c +++ b/drivers/base/power/runtime.c @@ -308,13 +308,10 @@ static int rpm_get_suppliers(struct device *dev) /** * pm_runtime_release_supplier - Drop references to device link's supplier. * @link: Target device link. - * @check_idle: Whether or not to check if the supplier device is idle. * - * Drop all runtime PM references associated with @link to its supplier device - * and if @check_idle is set, check if that device is idle (and so it can be - * suspended). + * Drop all runtime PM references associated with @link to its supplier device. */ -void pm_runtime_release_supplier(struct device_link *link, bool check_idle) +void pm_runtime_release_supplier(struct device_link *link) { struct device *supplier = link->supplier; @@ -327,9 +324,6 @@ void pm_runtime_release_supplier(struct device_link *link, bool check_idle) while (refcount_dec_not_one(&link->rpm_active) && atomic_read(&supplier->power.usage_count) > 0) pm_runtime_put_noidle(supplier); - - if (check_idle) - pm_request_idle(supplier); } static void __rpm_put_suppliers(struct device *dev, bool try_to_suspend) @@ -337,8 +331,11 @@ static void __rpm_put_suppliers(struct device *dev, bool try_to_suspend) struct device_link *link; list_for_each_entry_rcu(link, &dev->links.suppliers, c_node, - device_links_read_lock_held()) - pm_runtime_release_supplier(link, try_to_suspend); + device_links_read_lock_held()) { + pm_runtime_release_supplier(link); + if (try_to_suspend) + pm_request_idle(link->supplier); + } } static void rpm_put_suppliers(struct device *dev) @@ -1791,7 +1788,8 @@ void pm_runtime_drop_link(struct device_link *link) return; pm_runtime_drop_link_count(link->consumer); - pm_runtime_release_supplier(link, true); + pm_runtime_release_supplier(link); + pm_request_idle(link->supplier); } static bool pm_runtime_need_not_resume(struct device *dev) diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h index 016de5776b6db4..90eaff8b78fc93 100644 --- a/include/linux/pm_runtime.h +++ b/include/linux/pm_runtime.h @@ -58,7 +58,7 @@ extern void pm_runtime_get_suppliers(struct device *dev); extern void pm_runtime_put_suppliers(struct device *dev); extern void pm_runtime_new_link(struct device *dev); extern void pm_runtime_drop_link(struct device_link *link); -extern void pm_runtime_release_supplier(struct device_link *link, bool check_idle); +extern void pm_runtime_release_supplier(struct device_link *link); extern int devm_pm_runtime_enable(struct device *dev); @@ -284,8 +284,7 @@ static inline void pm_runtime_get_suppliers(struct device *dev) {} static inline void pm_runtime_put_suppliers(struct device *dev) {} static inline void pm_runtime_new_link(struct device *dev) {} static inline void pm_runtime_drop_link(struct device_link *link) {} -static inline void pm_runtime_release_supplier(struct device_link *link, - bool check_idle) {} +static inline void pm_runtime_release_supplier(struct device_link *link) {} #endif /* !CONFIG_PM */ From 6c9c8a7a9a54bc2e3507e9c0e9e38d62dfb86f50 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 23 Jun 2022 13:02:31 -0700 Subject: [PATCH 0424/1056] memregion: Fix memregion_free() fallback definition commit f50974eee5c4a5de1e4f1a3d873099f170df25f8 upstream. In the CONFIG_MEMREGION=n case, memregion_free() is meant to be a static inline. 0day reports: In file included from drivers/cxl/core/port.c:4: include/linux/memregion.h:19:6: warning: no previous prototype for function 'memregion_free' [-Wmissing-prototypes] Mark memregion_free() static. Fixes: 33dd70752cd7 ("lib: Uplevel the pmem "region" ida to a global allocator") Reported-by: kernel test robot Reviewed-by: Alison Schofield Link: https://lore.kernel.org/r/165601455171.4042645.3350844271068713515.stgit@dwillia2-xfh Signed-off-by: Dan Williams Signed-off-by: Greg Kroah-Hartman --- include/linux/memregion.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/memregion.h b/include/linux/memregion.h index e11595256cac00..c04c4fd2e20919 100644 --- a/include/linux/memregion.h +++ b/include/linux/memregion.h @@ -16,7 +16,7 @@ static inline int memregion_alloc(gfp_t gfp) { return -ENOMEM; } -void memregion_free(int id) +static inline void memregion_free(int id) { } #endif From cc409f88e8f71235dda7d2eddae26e481503ea4d Mon Sep 17 00:00:00 2001 From: Hsin-Yi Wang Date: Fri, 1 Jul 2022 01:33:29 +0800 Subject: [PATCH 0425/1056] video: of_display_timing.h: include errno.h commit 3663a2fb325b8782524f3edb0ae32d6faa615109 upstream. If CONFIG_OF is not enabled, default of_get_display_timing() returns an errno, so include the header. Fixes: 422b67e0b31a ("videomode: provide dummy inline functions for !CONFIG_OF") Suggested-by: Stephen Boyd Signed-off-by: Hsin-Yi Wang Reviewed-by: Stephen Boyd Signed-off-by: Helge Deller Signed-off-by: Greg Kroah-Hartman --- include/video/of_display_timing.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/video/of_display_timing.h b/include/video/of_display_timing.h index e1126a74882a5f..eff166fdd81b95 100644 --- a/include/video/of_display_timing.h +++ b/include/video/of_display_timing.h @@ -8,6 +8,8 @@ #ifndef __LINUX_OF_DISPLAY_TIMING_H #define __LINUX_OF_DISPLAY_TIMING_H +#include + struct device_node; struct display_timing; struct display_timings; From c79726aba6af70294663b274df09e920d8e7c87f Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Thu, 30 Jun 2022 14:16:54 +0200 Subject: [PATCH 0426/1056] powerpc/powernv: delay rng platform device creation until later in boot commit 887502826549caa7e4215fd9e628f48f14c0825a upstream. The platform device for the rng must be created much later in boot. Otherwise it tries to connect to a parent that doesn't yet exist, resulting in this splat: [ 0.000478] kobject: '(null)' ((____ptrval____)): is not initialized, yet kobject_get() is being called. [ 0.002925] [c000000002a0fb30] [c00000000073b0bc] kobject_get+0x8c/0x100 (unreliable) [ 0.003071] [c000000002a0fba0] [c00000000087e464] device_add+0xf4/0xb00 [ 0.003194] [c000000002a0fc80] [c000000000a7f6e4] of_device_add+0x64/0x80 [ 0.003321] [c000000002a0fcb0] [c000000000a800d0] of_platform_device_create_pdata+0xd0/0x1b0 [ 0.003476] [c000000002a0fd00] [c00000000201fa44] pnv_get_random_long_early+0x240/0x2e4 [ 0.003623] [c000000002a0fe20] [c000000002060c38] random_init+0xc0/0x214 This patch fixes the issue by doing the platform device creation inside of machine_subsys_initcall. Fixes: f3eac426657d ("powerpc/powernv: wire up rng during setup_arch") Cc: stable@vger.kernel.org Reported-by: Sachin Sant Signed-off-by: Jason A. Donenfeld Tested-by: Sachin Sant [mpe: Change "of node" to "platform device" in change log] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220630121654.1939181-1-Jason@zx2c4.com Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/platforms/powernv/rng.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/platforms/powernv/rng.c b/arch/powerpc/platforms/powernv/rng.c index 2b5a1a41234cc6..236bd2ba51b983 100644 --- a/arch/powerpc/platforms/powernv/rng.c +++ b/arch/powerpc/platforms/powernv/rng.c @@ -176,12 +176,8 @@ static int __init pnv_get_random_long_early(unsigned long *v) NULL) != pnv_get_random_long_early) return 0; - for_each_compatible_node(dn, NULL, "ibm,power-rng") { - if (rng_create(dn)) - continue; - /* Create devices for hwrng driver */ - of_platform_device_create(dn, NULL, NULL); - } + for_each_compatible_node(dn, NULL, "ibm,power-rng") + rng_create(dn); if (!ppc_md.get_random_seed) return 0; @@ -205,10 +201,18 @@ void __init pnv_rng_init(void) static int __init pnv_rng_late_init(void) { + struct device_node *dn; unsigned long v; + /* In case it wasn't called during init for some other reason. */ if (ppc_md.get_random_seed == pnv_get_random_long_early) pnv_get_random_long_early(&v); + + if (ppc_md.get_random_seed == powernv_get_random_long) { + for_each_compatible_node(dn, NULL, "ibm,power-rng") + of_platform_device_create(dn, NULL, NULL); + } + return 0; } machine_subsys_initcall(powernv, pnv_rng_late_init); From 188c798f3c2554fa0d7147e9b97baf144b817019 Mon Sep 17 00:00:00 2001 From: Christian Marangi Date: Tue, 21 Jun 2022 17:11:22 +0200 Subject: [PATCH 0427/1056] net: dsa: qca8k: reset cpu port on MTU change commit 386228c694bf1e7a7688e44412cb33500b0ac585 upstream. It was discovered that the Documentation lacks of a fundamental detail on how to correctly change the MAX_FRAME_SIZE of the switch. In fact if the MAX_FRAME_SIZE is changed while the cpu port is on, the switch panics and cease to send any packet. This cause the mgmt ethernet system to not receive any packet (the slow fallback still works) and makes the device not reachable. To recover from this a switch reset is required. To correctly handle this, turn off the cpu ports before changing the MAX_FRAME_SIZE and turn on again after the value is applied. Fixes: f58d2598cf70 ("net: dsa: qca8k: implement the port MTU callbacks") Cc: stable@vger.kernel.org Signed-off-by: Christian Marangi Link: https://lore.kernel.org/r/20220621151122.10220-1-ansuelsmth@gmail.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/net/dsa/qca8k.c | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/drivers/net/dsa/qca8k.c b/drivers/net/dsa/qca8k.c index a984f06f6f04f3..67869c8cbeaa80 100644 --- a/drivers/net/dsa/qca8k.c +++ b/drivers/net/dsa/qca8k.c @@ -1599,7 +1599,7 @@ static int qca8k_port_change_mtu(struct dsa_switch *ds, int port, int new_mtu) { struct qca8k_priv *priv = ds->priv; - int i, mtu = 0; + int ret, i, mtu = 0; priv->port_mtu[port] = new_mtu; @@ -1607,8 +1607,27 @@ qca8k_port_change_mtu(struct dsa_switch *ds, int port, int new_mtu) if (priv->port_mtu[i] > mtu) mtu = priv->port_mtu[i]; + /* To change the MAX_FRAME_SIZE the cpu ports must be off or + * the switch panics. + * Turn off both cpu ports before applying the new value to prevent + * this. + */ + if (priv->port_sts[0].enabled) + qca8k_port_set_status(priv, 0, 0); + + if (priv->port_sts[6].enabled) + qca8k_port_set_status(priv, 6, 0); + /* Include L2 header / FCS length */ - return qca8k_write(priv, QCA8K_MAX_FRAME_SIZE, mtu + ETH_HLEN + ETH_FCS_LEN); + ret = qca8k_write(priv, QCA8K_MAX_FRAME_SIZE, mtu + ETH_HLEN + ETH_FCS_LEN); + + if (priv->port_sts[0].enabled) + qca8k_port_set_status(priv, 0, 1); + + if (priv->port_sts[6].enabled) + qca8k_port_set_status(priv, 6, 1); + + return ret; } static int From baffaed7fab3fc2e047691a4b192c03b97b2728a Mon Sep 17 00:00:00 2001 From: Jimmy Assarsson Date: Fri, 8 Jul 2022 20:48:44 +0200 Subject: [PATCH 0428/1056] can: kvaser_usb: replace run-time checks with struct kvaser_usb_driver_info commit 49f274c72357d2d74cba70b172cf369768909707 upstream. Unify and move compile-time known information into new struct kvaser_usb_driver_info, in favor of run-time checks. All Kvaser USBcanII supports listen-only mode and error counter reporting. Link: https://lore.kernel.org/all/20220603083820.800246-2-extja@kvaser.com Suggested-by: Marc Kleine-Budde Cc: stable@vger.kernel.org Signed-off-by: Jimmy Assarsson [mkl: move struct kvaser_usb_driver_info into kvaser_usb_core.c] Signed-off-by: Marc Kleine-Budde Signed-off-by: Greg Kroah-Hartman --- drivers/net/can/usb/kvaser_usb/kvaser_usb.h | 22 +- .../net/can/usb/kvaser_usb/kvaser_usb_core.c | 275 ++++++++++-------- .../net/can/usb/kvaser_usb/kvaser_usb_leaf.c | 24 +- 3 files changed, 173 insertions(+), 148 deletions(-) diff --git a/drivers/net/can/usb/kvaser_usb/kvaser_usb.h b/drivers/net/can/usb/kvaser_usb/kvaser_usb.h index 390b6bde883c8f..2e358e20d3d961 100644 --- a/drivers/net/can/usb/kvaser_usb/kvaser_usb.h +++ b/drivers/net/can/usb/kvaser_usb/kvaser_usb.h @@ -35,9 +35,9 @@ #define KVASER_USB_RX_BUFFER_SIZE 3072 #define KVASER_USB_MAX_NET_DEVICES 5 -/* USB devices features */ -#define KVASER_USB_HAS_SILENT_MODE BIT(0) -#define KVASER_USB_HAS_TXRX_ERRORS BIT(1) +/* Kvaser USB device quirks */ +#define KVASER_USB_QUIRK_HAS_SILENT_MODE BIT(0) +#define KVASER_USB_QUIRK_HAS_TXRX_ERRORS BIT(1) /* Device capabilities */ #define KVASER_USB_CAP_BERR_CAP 0x01 @@ -65,12 +65,7 @@ struct kvaser_usb_dev_card_data_hydra { struct kvaser_usb_dev_card_data { u32 ctrlmode_supported; u32 capabilities; - union { - struct { - enum kvaser_usb_leaf_family family; - } leaf; - struct kvaser_usb_dev_card_data_hydra hydra; - }; + struct kvaser_usb_dev_card_data_hydra hydra; }; /* Context for an outstanding, not yet ACKed, transmission */ @@ -84,7 +79,7 @@ struct kvaser_usb { struct usb_device *udev; struct usb_interface *intf; struct kvaser_usb_net_priv *nets[KVASER_USB_MAX_NET_DEVICES]; - const struct kvaser_usb_dev_ops *ops; + const struct kvaser_usb_driver_info *driver_info; const struct kvaser_usb_dev_cfg *cfg; struct usb_endpoint_descriptor *bulk_in, *bulk_out; @@ -166,6 +161,12 @@ struct kvaser_usb_dev_ops { int *cmd_len, u16 transid); }; +struct kvaser_usb_driver_info { + u32 quirks; + enum kvaser_usb_leaf_family family; + const struct kvaser_usb_dev_ops *ops; +}; + struct kvaser_usb_dev_cfg { const struct can_clock clock; const unsigned int timestamp_freq; @@ -185,4 +186,5 @@ int kvaser_usb_send_cmd_async(struct kvaser_usb_net_priv *priv, void *cmd, int len); int kvaser_usb_can_rx_over_error(struct net_device *netdev); + #endif /* KVASER_USB_H */ diff --git a/drivers/net/can/usb/kvaser_usb/kvaser_usb_core.c b/drivers/net/can/usb/kvaser_usb/kvaser_usb_core.c index 0cc0fc866a2a97..45abf2093b6663 100644 --- a/drivers/net/can/usb/kvaser_usb/kvaser_usb_core.c +++ b/drivers/net/can/usb/kvaser_usb/kvaser_usb_core.c @@ -61,8 +61,6 @@ #define USB_USBCAN_R_V2_PRODUCT_ID 294 #define USB_LEAF_LIGHT_R_V2_PRODUCT_ID 295 #define USB_LEAF_LIGHT_HS_V2_OEM2_PRODUCT_ID 296 -#define USB_LEAF_PRODUCT_ID_END \ - USB_LEAF_LIGHT_HS_V2_OEM2_PRODUCT_ID /* Kvaser USBCan-II devices product ids */ #define USB_USBCAN_REVB_PRODUCT_ID 2 @@ -89,116 +87,144 @@ #define USB_USBCAN_PRO_4HS_PRODUCT_ID 276 #define USB_HYBRID_CANLIN_PRODUCT_ID 277 #define USB_HYBRID_PRO_CANLIN_PRODUCT_ID 278 -#define USB_HYDRA_PRODUCT_ID_END \ - USB_HYBRID_PRO_CANLIN_PRODUCT_ID -static inline bool kvaser_is_leaf(const struct usb_device_id *id) -{ - return (id->idProduct >= USB_LEAF_DEVEL_PRODUCT_ID && - id->idProduct <= USB_CAN_R_PRODUCT_ID) || - (id->idProduct >= USB_LEAF_LITE_V2_PRODUCT_ID && - id->idProduct <= USB_LEAF_PRODUCT_ID_END); -} +static const struct kvaser_usb_driver_info kvaser_usb_driver_info_hydra = { + .quirks = 0, + .ops = &kvaser_usb_hydra_dev_ops, +}; -static inline bool kvaser_is_usbcan(const struct usb_device_id *id) -{ - return id->idProduct >= USB_USBCAN_REVB_PRODUCT_ID && - id->idProduct <= USB_MEMORATOR_PRODUCT_ID; -} +static const struct kvaser_usb_driver_info kvaser_usb_driver_info_usbcan = { + .quirks = KVASER_USB_QUIRK_HAS_TXRX_ERRORS | + KVASER_USB_QUIRK_HAS_SILENT_MODE, + .family = KVASER_USBCAN, + .ops = &kvaser_usb_leaf_dev_ops, +}; -static inline bool kvaser_is_hydra(const struct usb_device_id *id) -{ - return id->idProduct >= USB_BLACKBIRD_V2_PRODUCT_ID && - id->idProduct <= USB_HYDRA_PRODUCT_ID_END; -} +static const struct kvaser_usb_driver_info kvaser_usb_driver_info_leaf = { + .quirks = 0, + .family = KVASER_LEAF, + .ops = &kvaser_usb_leaf_dev_ops, +}; + +static const struct kvaser_usb_driver_info kvaser_usb_driver_info_leaf_err = { + .quirks = KVASER_USB_QUIRK_HAS_TXRX_ERRORS, + .family = KVASER_LEAF, + .ops = &kvaser_usb_leaf_dev_ops, +}; + +static const struct kvaser_usb_driver_info kvaser_usb_driver_info_leaf_err_listen = { + .quirks = KVASER_USB_QUIRK_HAS_TXRX_ERRORS | + KVASER_USB_QUIRK_HAS_SILENT_MODE, + .family = KVASER_LEAF, + .ops = &kvaser_usb_leaf_dev_ops, +}; static const struct usb_device_id kvaser_usb_table[] = { /* Leaf USB product IDs */ - { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_DEVEL_PRODUCT_ID) }, - { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_LITE_PRODUCT_ID) }, + { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_DEVEL_PRODUCT_ID), + .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leaf }, + { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_LITE_PRODUCT_ID), + .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leaf }, { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_PRO_PRODUCT_ID), - .driver_info = KVASER_USB_HAS_TXRX_ERRORS | - KVASER_USB_HAS_SILENT_MODE }, + .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leaf_err_listen }, { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_SPRO_PRODUCT_ID), - .driver_info = KVASER_USB_HAS_TXRX_ERRORS | - KVASER_USB_HAS_SILENT_MODE }, + .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leaf_err_listen }, { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_PRO_LS_PRODUCT_ID), - .driver_info = KVASER_USB_HAS_TXRX_ERRORS | - KVASER_USB_HAS_SILENT_MODE }, + .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leaf_err_listen }, { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_PRO_SWC_PRODUCT_ID), - .driver_info = KVASER_USB_HAS_TXRX_ERRORS | - KVASER_USB_HAS_SILENT_MODE }, + .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leaf_err_listen }, { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_PRO_LIN_PRODUCT_ID), - .driver_info = KVASER_USB_HAS_TXRX_ERRORS | - KVASER_USB_HAS_SILENT_MODE }, + .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leaf_err_listen }, { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_SPRO_LS_PRODUCT_ID), - .driver_info = KVASER_USB_HAS_TXRX_ERRORS | - KVASER_USB_HAS_SILENT_MODE }, + .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leaf_err_listen }, { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_SPRO_SWC_PRODUCT_ID), - .driver_info = KVASER_USB_HAS_TXRX_ERRORS | - KVASER_USB_HAS_SILENT_MODE }, + .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leaf_err_listen }, { USB_DEVICE(KVASER_VENDOR_ID, USB_MEMO2_DEVEL_PRODUCT_ID), - .driver_info = KVASER_USB_HAS_TXRX_ERRORS | - KVASER_USB_HAS_SILENT_MODE }, + .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leaf_err_listen }, { USB_DEVICE(KVASER_VENDOR_ID, USB_MEMO2_HSHS_PRODUCT_ID), - .driver_info = KVASER_USB_HAS_TXRX_ERRORS | - KVASER_USB_HAS_SILENT_MODE }, + .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leaf_err_listen }, { USB_DEVICE(KVASER_VENDOR_ID, USB_UPRO_HSHS_PRODUCT_ID), - .driver_info = KVASER_USB_HAS_TXRX_ERRORS }, - { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_LITE_GI_PRODUCT_ID) }, + .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leaf_err }, + { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_LITE_GI_PRODUCT_ID), + .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leaf }, { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_PRO_OBDII_PRODUCT_ID), - .driver_info = KVASER_USB_HAS_TXRX_ERRORS | - KVASER_USB_HAS_SILENT_MODE }, + .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leaf_err_listen }, { USB_DEVICE(KVASER_VENDOR_ID, USB_MEMO2_HSLS_PRODUCT_ID), - .driver_info = KVASER_USB_HAS_TXRX_ERRORS }, + .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leaf_err }, { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_LITE_CH_PRODUCT_ID), - .driver_info = KVASER_USB_HAS_TXRX_ERRORS }, + .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leaf_err }, { USB_DEVICE(KVASER_VENDOR_ID, USB_BLACKBIRD_SPRO_PRODUCT_ID), - .driver_info = KVASER_USB_HAS_TXRX_ERRORS }, + .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leaf_err }, { USB_DEVICE(KVASER_VENDOR_ID, USB_OEM_MERCURY_PRODUCT_ID), - .driver_info = KVASER_USB_HAS_TXRX_ERRORS }, + .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leaf_err }, { USB_DEVICE(KVASER_VENDOR_ID, USB_OEM_LEAF_PRODUCT_ID), - .driver_info = KVASER_USB_HAS_TXRX_ERRORS }, + .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leaf_err }, { USB_DEVICE(KVASER_VENDOR_ID, USB_CAN_R_PRODUCT_ID), - .driver_info = KVASER_USB_HAS_TXRX_ERRORS }, - { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_LITE_V2_PRODUCT_ID) }, - { USB_DEVICE(KVASER_VENDOR_ID, USB_MINI_PCIE_HS_PRODUCT_ID) }, - { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_LIGHT_HS_V2_OEM_PRODUCT_ID) }, - { USB_DEVICE(KVASER_VENDOR_ID, USB_USBCAN_LIGHT_2HS_PRODUCT_ID) }, - { USB_DEVICE(KVASER_VENDOR_ID, USB_MINI_PCIE_2HS_PRODUCT_ID) }, - { USB_DEVICE(KVASER_VENDOR_ID, USB_USBCAN_R_V2_PRODUCT_ID) }, - { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_LIGHT_R_V2_PRODUCT_ID) }, - { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_LIGHT_HS_V2_OEM2_PRODUCT_ID) }, + .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leaf_err }, + { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_LITE_V2_PRODUCT_ID), + .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leaf }, + { USB_DEVICE(KVASER_VENDOR_ID, USB_MINI_PCIE_HS_PRODUCT_ID), + .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leaf }, + { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_LIGHT_HS_V2_OEM_PRODUCT_ID), + .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leaf }, + { USB_DEVICE(KVASER_VENDOR_ID, USB_USBCAN_LIGHT_2HS_PRODUCT_ID), + .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leaf }, + { USB_DEVICE(KVASER_VENDOR_ID, USB_MINI_PCIE_2HS_PRODUCT_ID), + .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leaf }, + { USB_DEVICE(KVASER_VENDOR_ID, USB_USBCAN_R_V2_PRODUCT_ID), + .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leaf }, + { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_LIGHT_R_V2_PRODUCT_ID), + .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leaf }, + { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_LIGHT_HS_V2_OEM2_PRODUCT_ID), + .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leaf }, /* USBCANII USB product IDs */ { USB_DEVICE(KVASER_VENDOR_ID, USB_USBCAN2_PRODUCT_ID), - .driver_info = KVASER_USB_HAS_TXRX_ERRORS }, + .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_usbcan }, { USB_DEVICE(KVASER_VENDOR_ID, USB_USBCAN_REVB_PRODUCT_ID), - .driver_info = KVASER_USB_HAS_TXRX_ERRORS }, + .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_usbcan }, { USB_DEVICE(KVASER_VENDOR_ID, USB_MEMORATOR_PRODUCT_ID), - .driver_info = KVASER_USB_HAS_TXRX_ERRORS }, + .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_usbcan }, { USB_DEVICE(KVASER_VENDOR_ID, USB_VCI2_PRODUCT_ID), - .driver_info = KVASER_USB_HAS_TXRX_ERRORS }, + .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_usbcan }, /* Minihydra USB product IDs */ - { USB_DEVICE(KVASER_VENDOR_ID, USB_BLACKBIRD_V2_PRODUCT_ID) }, - { USB_DEVICE(KVASER_VENDOR_ID, USB_MEMO_PRO_5HS_PRODUCT_ID) }, - { USB_DEVICE(KVASER_VENDOR_ID, USB_USBCAN_PRO_5HS_PRODUCT_ID) }, - { USB_DEVICE(KVASER_VENDOR_ID, USB_USBCAN_LIGHT_4HS_PRODUCT_ID) }, - { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_PRO_HS_V2_PRODUCT_ID) }, - { USB_DEVICE(KVASER_VENDOR_ID, USB_USBCAN_PRO_2HS_V2_PRODUCT_ID) }, - { USB_DEVICE(KVASER_VENDOR_ID, USB_MEMO_2HS_PRODUCT_ID) }, - { USB_DEVICE(KVASER_VENDOR_ID, USB_MEMO_PRO_2HS_V2_PRODUCT_ID) }, - { USB_DEVICE(KVASER_VENDOR_ID, USB_HYBRID_2CANLIN_PRODUCT_ID) }, - { USB_DEVICE(KVASER_VENDOR_ID, USB_ATI_USBCAN_PRO_2HS_V2_PRODUCT_ID) }, - { USB_DEVICE(KVASER_VENDOR_ID, USB_ATI_MEMO_PRO_2HS_V2_PRODUCT_ID) }, - { USB_DEVICE(KVASER_VENDOR_ID, USB_HYBRID_PRO_2CANLIN_PRODUCT_ID) }, - { USB_DEVICE(KVASER_VENDOR_ID, USB_U100_PRODUCT_ID) }, - { USB_DEVICE(KVASER_VENDOR_ID, USB_U100P_PRODUCT_ID) }, - { USB_DEVICE(KVASER_VENDOR_ID, USB_U100S_PRODUCT_ID) }, - { USB_DEVICE(KVASER_VENDOR_ID, USB_USBCAN_PRO_4HS_PRODUCT_ID) }, - { USB_DEVICE(KVASER_VENDOR_ID, USB_HYBRID_CANLIN_PRODUCT_ID) }, - { USB_DEVICE(KVASER_VENDOR_ID, USB_HYBRID_PRO_CANLIN_PRODUCT_ID) }, + { USB_DEVICE(KVASER_VENDOR_ID, USB_BLACKBIRD_V2_PRODUCT_ID), + .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_hydra }, + { USB_DEVICE(KVASER_VENDOR_ID, USB_MEMO_PRO_5HS_PRODUCT_ID), + .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_hydra }, + { USB_DEVICE(KVASER_VENDOR_ID, USB_USBCAN_PRO_5HS_PRODUCT_ID), + .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_hydra }, + { USB_DEVICE(KVASER_VENDOR_ID, USB_USBCAN_LIGHT_4HS_PRODUCT_ID), + .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_hydra }, + { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_PRO_HS_V2_PRODUCT_ID), + .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_hydra }, + { USB_DEVICE(KVASER_VENDOR_ID, USB_USBCAN_PRO_2HS_V2_PRODUCT_ID), + .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_hydra }, + { USB_DEVICE(KVASER_VENDOR_ID, USB_MEMO_2HS_PRODUCT_ID), + .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_hydra }, + { USB_DEVICE(KVASER_VENDOR_ID, USB_MEMO_PRO_2HS_V2_PRODUCT_ID), + .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_hydra }, + { USB_DEVICE(KVASER_VENDOR_ID, USB_HYBRID_2CANLIN_PRODUCT_ID), + .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_hydra }, + { USB_DEVICE(KVASER_VENDOR_ID, USB_ATI_USBCAN_PRO_2HS_V2_PRODUCT_ID), + .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_hydra }, + { USB_DEVICE(KVASER_VENDOR_ID, USB_ATI_MEMO_PRO_2HS_V2_PRODUCT_ID), + .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_hydra }, + { USB_DEVICE(KVASER_VENDOR_ID, USB_HYBRID_PRO_2CANLIN_PRODUCT_ID), + .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_hydra }, + { USB_DEVICE(KVASER_VENDOR_ID, USB_U100_PRODUCT_ID), + .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_hydra }, + { USB_DEVICE(KVASER_VENDOR_ID, USB_U100P_PRODUCT_ID), + .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_hydra }, + { USB_DEVICE(KVASER_VENDOR_ID, USB_U100S_PRODUCT_ID), + .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_hydra }, + { USB_DEVICE(KVASER_VENDOR_ID, USB_USBCAN_PRO_4HS_PRODUCT_ID), + .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_hydra }, + { USB_DEVICE(KVASER_VENDOR_ID, USB_HYBRID_CANLIN_PRODUCT_ID), + .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_hydra }, + { USB_DEVICE(KVASER_VENDOR_ID, USB_HYBRID_PRO_CANLIN_PRODUCT_ID), + .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_hydra }, { } }; MODULE_DEVICE_TABLE(usb, kvaser_usb_table); @@ -289,6 +315,7 @@ int kvaser_usb_can_rx_over_error(struct net_device *netdev) static void kvaser_usb_read_bulk_callback(struct urb *urb) { struct kvaser_usb *dev = urb->context; + const struct kvaser_usb_dev_ops *ops = dev->driver_info->ops; int err; unsigned int i; @@ -305,8 +332,8 @@ static void kvaser_usb_read_bulk_callback(struct urb *urb) goto resubmit_urb; } - dev->ops->dev_read_bulk_callback(dev, urb->transfer_buffer, - urb->actual_length); + ops->dev_read_bulk_callback(dev, urb->transfer_buffer, + urb->actual_length); resubmit_urb: usb_fill_bulk_urb(urb, dev->udev, @@ -400,6 +427,7 @@ static int kvaser_usb_open(struct net_device *netdev) { struct kvaser_usb_net_priv *priv = netdev_priv(netdev); struct kvaser_usb *dev = priv->dev; + const struct kvaser_usb_dev_ops *ops = dev->driver_info->ops; int err; err = open_candev(netdev); @@ -410,11 +438,11 @@ static int kvaser_usb_open(struct net_device *netdev) if (err) goto error; - err = dev->ops->dev_set_opt_mode(priv); + err = ops->dev_set_opt_mode(priv); if (err) goto error; - err = dev->ops->dev_start_chip(priv); + err = ops->dev_start_chip(priv); if (err) { netdev_warn(netdev, "Cannot start device, error %d\n", err); goto error; @@ -471,22 +499,23 @@ static int kvaser_usb_close(struct net_device *netdev) { struct kvaser_usb_net_priv *priv = netdev_priv(netdev); struct kvaser_usb *dev = priv->dev; + const struct kvaser_usb_dev_ops *ops = dev->driver_info->ops; int err; netif_stop_queue(netdev); - err = dev->ops->dev_flush_queue(priv); + err = ops->dev_flush_queue(priv); if (err) netdev_warn(netdev, "Cannot flush queue, error %d\n", err); - if (dev->ops->dev_reset_chip) { - err = dev->ops->dev_reset_chip(dev, priv->channel); + if (ops->dev_reset_chip) { + err = ops->dev_reset_chip(dev, priv->channel); if (err) netdev_warn(netdev, "Cannot reset card, error %d\n", err); } - err = dev->ops->dev_stop_chip(priv); + err = ops->dev_stop_chip(priv); if (err) netdev_warn(netdev, "Cannot stop device, error %d\n", err); @@ -525,6 +554,7 @@ static netdev_tx_t kvaser_usb_start_xmit(struct sk_buff *skb, { struct kvaser_usb_net_priv *priv = netdev_priv(netdev); struct kvaser_usb *dev = priv->dev; + const struct kvaser_usb_dev_ops *ops = dev->driver_info->ops; struct net_device_stats *stats = &netdev->stats; struct kvaser_usb_tx_urb_context *context = NULL; struct urb *urb; @@ -567,8 +597,8 @@ static netdev_tx_t kvaser_usb_start_xmit(struct sk_buff *skb, goto freeurb; } - buf = dev->ops->dev_frame_to_cmd(priv, skb, &context->dlc, &cmd_len, - context->echo_index); + buf = ops->dev_frame_to_cmd(priv, skb, &context->dlc, &cmd_len, + context->echo_index); if (!buf) { stats->tx_dropped++; dev_kfree_skb(skb); @@ -652,15 +682,16 @@ static void kvaser_usb_remove_interfaces(struct kvaser_usb *dev) } } -static int kvaser_usb_init_one(struct kvaser_usb *dev, - const struct usb_device_id *id, int channel) +static int kvaser_usb_init_one(struct kvaser_usb *dev, int channel) { struct net_device *netdev; struct kvaser_usb_net_priv *priv; + const struct kvaser_usb_driver_info *driver_info = dev->driver_info; + const struct kvaser_usb_dev_ops *ops = driver_info->ops; int err; - if (dev->ops->dev_reset_chip) { - err = dev->ops->dev_reset_chip(dev, channel); + if (ops->dev_reset_chip) { + err = ops->dev_reset_chip(dev, channel); if (err) return err; } @@ -689,20 +720,19 @@ static int kvaser_usb_init_one(struct kvaser_usb *dev, priv->can.state = CAN_STATE_STOPPED; priv->can.clock.freq = dev->cfg->clock.freq; priv->can.bittiming_const = dev->cfg->bittiming_const; - priv->can.do_set_bittiming = dev->ops->dev_set_bittiming; - priv->can.do_set_mode = dev->ops->dev_set_mode; - if ((id->driver_info & KVASER_USB_HAS_TXRX_ERRORS) || + priv->can.do_set_bittiming = ops->dev_set_bittiming; + priv->can.do_set_mode = ops->dev_set_mode; + if ((driver_info->quirks & KVASER_USB_QUIRK_HAS_TXRX_ERRORS) || (priv->dev->card_data.capabilities & KVASER_USB_CAP_BERR_CAP)) - priv->can.do_get_berr_counter = dev->ops->dev_get_berr_counter; - if (id->driver_info & KVASER_USB_HAS_SILENT_MODE) + priv->can.do_get_berr_counter = ops->dev_get_berr_counter; + if (driver_info->quirks & KVASER_USB_QUIRK_HAS_SILENT_MODE) priv->can.ctrlmode_supported |= CAN_CTRLMODE_LISTENONLY; priv->can.ctrlmode_supported |= dev->card_data.ctrlmode_supported; if (priv->can.ctrlmode_supported & CAN_CTRLMODE_FD) { priv->can.data_bittiming_const = dev->cfg->data_bittiming_const; - priv->can.do_set_data_bittiming = - dev->ops->dev_set_data_bittiming; + priv->can.do_set_data_bittiming = ops->dev_set_data_bittiming; } netdev->flags |= IFF_ECHO; @@ -733,29 +763,22 @@ static int kvaser_usb_probe(struct usb_interface *intf, struct kvaser_usb *dev; int err; int i; + const struct kvaser_usb_driver_info *driver_info; + const struct kvaser_usb_dev_ops *ops; + + driver_info = (const struct kvaser_usb_driver_info *)id->driver_info; + if (!driver_info) + return -ENODEV; dev = devm_kzalloc(&intf->dev, sizeof(*dev), GFP_KERNEL); if (!dev) return -ENOMEM; - if (kvaser_is_leaf(id)) { - dev->card_data.leaf.family = KVASER_LEAF; - dev->ops = &kvaser_usb_leaf_dev_ops; - } else if (kvaser_is_usbcan(id)) { - dev->card_data.leaf.family = KVASER_USBCAN; - dev->ops = &kvaser_usb_leaf_dev_ops; - } else if (kvaser_is_hydra(id)) { - dev->ops = &kvaser_usb_hydra_dev_ops; - } else { - dev_err(&intf->dev, - "Product ID (%d) is not a supported Kvaser USB device\n", - id->idProduct); - return -ENODEV; - } - dev->intf = intf; + dev->driver_info = driver_info; + ops = driver_info->ops; - err = dev->ops->dev_setup_endpoints(dev); + err = ops->dev_setup_endpoints(dev); if (err) { dev_err(&intf->dev, "Cannot get usb endpoint(s)"); return err; @@ -769,22 +792,22 @@ static int kvaser_usb_probe(struct usb_interface *intf, dev->card_data.ctrlmode_supported = 0; dev->card_data.capabilities = 0; - err = dev->ops->dev_init_card(dev); + err = ops->dev_init_card(dev); if (err) { dev_err(&intf->dev, "Failed to initialize card, error %d\n", err); return err; } - err = dev->ops->dev_get_software_info(dev); + err = ops->dev_get_software_info(dev); if (err) { dev_err(&intf->dev, "Cannot get software info, error %d\n", err); return err; } - if (dev->ops->dev_get_software_details) { - err = dev->ops->dev_get_software_details(dev); + if (ops->dev_get_software_details) { + err = ops->dev_get_software_details(dev); if (err) { dev_err(&intf->dev, "Cannot get software details, error %d\n", err); @@ -802,14 +825,14 @@ static int kvaser_usb_probe(struct usb_interface *intf, dev_dbg(&intf->dev, "Max outstanding tx = %d URBs\n", dev->max_tx_urbs); - err = dev->ops->dev_get_card_info(dev); + err = ops->dev_get_card_info(dev); if (err) { dev_err(&intf->dev, "Cannot get card info, error %d\n", err); return err; } - if (dev->ops->dev_get_capabilities) { - err = dev->ops->dev_get_capabilities(dev); + if (ops->dev_get_capabilities) { + err = ops->dev_get_capabilities(dev); if (err) { dev_err(&intf->dev, "Cannot get capabilities, error %d\n", err); @@ -819,7 +842,7 @@ static int kvaser_usb_probe(struct usb_interface *intf, } for (i = 0; i < dev->nchannels; i++) { - err = kvaser_usb_init_one(dev, id, i); + err = kvaser_usb_init_one(dev, i); if (err) { kvaser_usb_remove_interfaces(dev); return err; diff --git a/drivers/net/can/usb/kvaser_usb/kvaser_usb_leaf.c b/drivers/net/can/usb/kvaser_usb/kvaser_usb_leaf.c index f7af1bf5ab46d9..dda65bb66c1bd5 100644 --- a/drivers/net/can/usb/kvaser_usb/kvaser_usb_leaf.c +++ b/drivers/net/can/usb/kvaser_usb/kvaser_usb_leaf.c @@ -405,7 +405,7 @@ kvaser_usb_leaf_frame_to_cmd(const struct kvaser_usb_net_priv *priv, sizeof(struct kvaser_cmd_tx_can); cmd->u.tx_can.channel = priv->channel; - switch (dev->card_data.leaf.family) { + switch (dev->driver_info->family) { case KVASER_LEAF: cmd_tx_can_flags = &cmd->u.tx_can.leaf.flags; break; @@ -551,7 +551,7 @@ static int kvaser_usb_leaf_get_software_info_inner(struct kvaser_usb *dev) if (err) return err; - switch (dev->card_data.leaf.family) { + switch (dev->driver_info->family) { case KVASER_LEAF: kvaser_usb_leaf_get_software_info_leaf(dev, &cmd.u.leaf.softinfo); break; @@ -598,7 +598,7 @@ static int kvaser_usb_leaf_get_card_info(struct kvaser_usb *dev) dev->nchannels = cmd.u.cardinfo.nchannels; if (dev->nchannels > KVASER_USB_MAX_NET_DEVICES || - (dev->card_data.leaf.family == KVASER_USBCAN && + (dev->driver_info->family == KVASER_USBCAN && dev->nchannels > MAX_USBCAN_NET_DEVICES)) return -EINVAL; @@ -734,7 +734,7 @@ kvaser_usb_leaf_rx_error_update_can_state(struct kvaser_usb_net_priv *priv, new_state < CAN_STATE_BUS_OFF) priv->can.can_stats.restarts++; - switch (dev->card_data.leaf.family) { + switch (dev->driver_info->family) { case KVASER_LEAF: if (es->leaf.error_factor) { priv->can.can_stats.bus_error++; @@ -813,7 +813,7 @@ static void kvaser_usb_leaf_rx_error(const struct kvaser_usb *dev, } } - switch (dev->card_data.leaf.family) { + switch (dev->driver_info->family) { case KVASER_LEAF: if (es->leaf.error_factor) { cf->can_id |= CAN_ERR_BUSERROR | CAN_ERR_PROT; @@ -1005,7 +1005,7 @@ static void kvaser_usb_leaf_rx_can_msg(const struct kvaser_usb *dev, stats = &priv->netdev->stats; if ((cmd->u.rx_can_header.flag & MSG_FLAG_ERROR_FRAME) && - (dev->card_data.leaf.family == KVASER_LEAF && + (dev->driver_info->family == KVASER_LEAF && cmd->id == CMD_LEAF_LOG_MESSAGE)) { kvaser_usb_leaf_leaf_rx_error(dev, cmd); return; @@ -1021,7 +1021,7 @@ static void kvaser_usb_leaf_rx_can_msg(const struct kvaser_usb *dev, return; } - switch (dev->card_data.leaf.family) { + switch (dev->driver_info->family) { case KVASER_LEAF: rx_data = cmd->u.leaf.rx_can.data; break; @@ -1036,7 +1036,7 @@ static void kvaser_usb_leaf_rx_can_msg(const struct kvaser_usb *dev, return; } - if (dev->card_data.leaf.family == KVASER_LEAF && cmd->id == + if (dev->driver_info->family == KVASER_LEAF && cmd->id == CMD_LEAF_LOG_MESSAGE) { cf->can_id = le32_to_cpu(cmd->u.leaf.log_message.id); if (cf->can_id & KVASER_EXTENDED_FRAME) @@ -1133,14 +1133,14 @@ static void kvaser_usb_leaf_handle_command(const struct kvaser_usb *dev, break; case CMD_LEAF_LOG_MESSAGE: - if (dev->card_data.leaf.family != KVASER_LEAF) + if (dev->driver_info->family != KVASER_LEAF) goto warn; kvaser_usb_leaf_rx_can_msg(dev, cmd); break; case CMD_CHIP_STATE_EVENT: case CMD_CAN_ERROR_EVENT: - if (dev->card_data.leaf.family == KVASER_LEAF) + if (dev->driver_info->family == KVASER_LEAF) kvaser_usb_leaf_leaf_rx_error(dev, cmd); else kvaser_usb_leaf_usbcan_rx_error(dev, cmd); @@ -1152,12 +1152,12 @@ static void kvaser_usb_leaf_handle_command(const struct kvaser_usb *dev, /* Ignored commands */ case CMD_USBCAN_CLOCK_OVERFLOW_EVENT: - if (dev->card_data.leaf.family != KVASER_USBCAN) + if (dev->driver_info->family != KVASER_USBCAN) goto warn; break; case CMD_FLUSH_QUEUE_REPLY: - if (dev->card_data.leaf.family != KVASER_LEAF) + if (dev->driver_info->family != KVASER_LEAF) goto warn; break; From 420b99306b7e2d7ab2bea3ec37efad36f585c716 Mon Sep 17 00:00:00 2001 From: Jimmy Assarsson Date: Fri, 8 Jul 2022 20:48:45 +0200 Subject: [PATCH 0429/1056] can: kvaser_usb: kvaser_usb_leaf: fix CAN clock frequency regression commit e6c80e601053ffdac5709f11ff3ec1e19ed05f7b upstream. The firmware of M32C based Leaf devices expects bittiming parameters calculated for 16MHz clock. Since we use the actual clock frequency of the device, the device may end up with wrong bittiming parameters, depending on user requested parameters. This regression affects M32C based Leaf devices with non-16MHz clock. Fixes: a8b513b824e4 ("can: kvaser_usb: get CAN clock frequency from device") Link: https://lore.kernel.org/all/20220603083820.800246-3-extja@kvaser.com Cc: stable@vger.kernel.org Signed-off-by: Jimmy Assarsson Signed-off-by: Marc Kleine-Budde Signed-off-by: Greg Kroah-Hartman --- drivers/net/can/usb/kvaser_usb/kvaser_usb.h | 1 + .../net/can/usb/kvaser_usb/kvaser_usb_core.c | 33 ++++++++++++------- .../net/can/usb/kvaser_usb/kvaser_usb_leaf.c | 25 +++++++++----- 3 files changed, 38 insertions(+), 21 deletions(-) diff --git a/drivers/net/can/usb/kvaser_usb/kvaser_usb.h b/drivers/net/can/usb/kvaser_usb/kvaser_usb.h index 2e358e20d3d961..478e2eeec13603 100644 --- a/drivers/net/can/usb/kvaser_usb/kvaser_usb.h +++ b/drivers/net/can/usb/kvaser_usb/kvaser_usb.h @@ -38,6 +38,7 @@ /* Kvaser USB device quirks */ #define KVASER_USB_QUIRK_HAS_SILENT_MODE BIT(0) #define KVASER_USB_QUIRK_HAS_TXRX_ERRORS BIT(1) +#define KVASER_USB_QUIRK_IGNORE_CLK_FREQ BIT(2) /* Device capabilities */ #define KVASER_USB_CAP_BERR_CAP 0x01 diff --git a/drivers/net/can/usb/kvaser_usb/kvaser_usb_core.c b/drivers/net/can/usb/kvaser_usb/kvaser_usb_core.c index 45abf2093b6663..e570f5a76bbfd0 100644 --- a/drivers/net/can/usb/kvaser_usb/kvaser_usb_core.c +++ b/drivers/net/can/usb/kvaser_usb/kvaser_usb_core.c @@ -101,26 +101,33 @@ static const struct kvaser_usb_driver_info kvaser_usb_driver_info_usbcan = { }; static const struct kvaser_usb_driver_info kvaser_usb_driver_info_leaf = { - .quirks = 0, + .quirks = KVASER_USB_QUIRK_IGNORE_CLK_FREQ, .family = KVASER_LEAF, .ops = &kvaser_usb_leaf_dev_ops, }; static const struct kvaser_usb_driver_info kvaser_usb_driver_info_leaf_err = { - .quirks = KVASER_USB_QUIRK_HAS_TXRX_ERRORS, + .quirks = KVASER_USB_QUIRK_HAS_TXRX_ERRORS | + KVASER_USB_QUIRK_IGNORE_CLK_FREQ, .family = KVASER_LEAF, .ops = &kvaser_usb_leaf_dev_ops, }; static const struct kvaser_usb_driver_info kvaser_usb_driver_info_leaf_err_listen = { .quirks = KVASER_USB_QUIRK_HAS_TXRX_ERRORS | - KVASER_USB_QUIRK_HAS_SILENT_MODE, + KVASER_USB_QUIRK_HAS_SILENT_MODE | + KVASER_USB_QUIRK_IGNORE_CLK_FREQ, .family = KVASER_LEAF, .ops = &kvaser_usb_leaf_dev_ops, }; +static const struct kvaser_usb_driver_info kvaser_usb_driver_info_leafimx = { + .quirks = 0, + .ops = &kvaser_usb_leaf_dev_ops, +}; + static const struct usb_device_id kvaser_usb_table[] = { - /* Leaf USB product IDs */ + /* Leaf M32C USB product IDs */ { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_DEVEL_PRODUCT_ID), .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leaf }, { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_LITE_PRODUCT_ID), @@ -161,22 +168,24 @@ static const struct usb_device_id kvaser_usb_table[] = { .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leaf_err }, { USB_DEVICE(KVASER_VENDOR_ID, USB_CAN_R_PRODUCT_ID), .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leaf_err }, + + /* Leaf i.MX28 USB product IDs */ { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_LITE_V2_PRODUCT_ID), - .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leaf }, + .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leafimx }, { USB_DEVICE(KVASER_VENDOR_ID, USB_MINI_PCIE_HS_PRODUCT_ID), - .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leaf }, + .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leafimx }, { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_LIGHT_HS_V2_OEM_PRODUCT_ID), - .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leaf }, + .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leafimx }, { USB_DEVICE(KVASER_VENDOR_ID, USB_USBCAN_LIGHT_2HS_PRODUCT_ID), - .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leaf }, + .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leafimx }, { USB_DEVICE(KVASER_VENDOR_ID, USB_MINI_PCIE_2HS_PRODUCT_ID), - .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leaf }, + .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leafimx }, { USB_DEVICE(KVASER_VENDOR_ID, USB_USBCAN_R_V2_PRODUCT_ID), - .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leaf }, + .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leafimx }, { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_LIGHT_R_V2_PRODUCT_ID), - .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leaf }, + .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leafimx }, { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_LIGHT_HS_V2_OEM2_PRODUCT_ID), - .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leaf }, + .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leafimx }, /* USBCANII USB product IDs */ { USB_DEVICE(KVASER_VENDOR_ID, USB_USBCAN2_PRODUCT_ID), diff --git a/drivers/net/can/usb/kvaser_usb/kvaser_usb_leaf.c b/drivers/net/can/usb/kvaser_usb/kvaser_usb_leaf.c index dda65bb66c1bd5..d94101c4650804 100644 --- a/drivers/net/can/usb/kvaser_usb/kvaser_usb_leaf.c +++ b/drivers/net/can/usb/kvaser_usb/kvaser_usb_leaf.c @@ -525,16 +525,23 @@ static void kvaser_usb_leaf_get_software_info_leaf(struct kvaser_usb *dev, dev->fw_version = le32_to_cpu(softinfo->fw_version); dev->max_tx_urbs = le16_to_cpu(softinfo->max_outstanding_tx); - switch (sw_options & KVASER_USB_LEAF_SWOPTION_FREQ_MASK) { - case KVASER_USB_LEAF_SWOPTION_FREQ_16_MHZ_CLK: + if (dev->driver_info->quirks & KVASER_USB_QUIRK_IGNORE_CLK_FREQ) { + /* Firmware expects bittiming parameters calculated for 16MHz + * clock, regardless of the actual clock + */ dev->cfg = &kvaser_usb_leaf_dev_cfg_16mhz; - break; - case KVASER_USB_LEAF_SWOPTION_FREQ_24_MHZ_CLK: - dev->cfg = &kvaser_usb_leaf_dev_cfg_24mhz; - break; - case KVASER_USB_LEAF_SWOPTION_FREQ_32_MHZ_CLK: - dev->cfg = &kvaser_usb_leaf_dev_cfg_32mhz; - break; + } else { + switch (sw_options & KVASER_USB_LEAF_SWOPTION_FREQ_MASK) { + case KVASER_USB_LEAF_SWOPTION_FREQ_16_MHZ_CLK: + dev->cfg = &kvaser_usb_leaf_dev_cfg_16mhz; + break; + case KVASER_USB_LEAF_SWOPTION_FREQ_24_MHZ_CLK: + dev->cfg = &kvaser_usb_leaf_dev_cfg_24mhz; + break; + case KVASER_USB_LEAF_SWOPTION_FREQ_32_MHZ_CLK: + dev->cfg = &kvaser_usb_leaf_dev_cfg_32mhz; + break; + } } } From 63a3d237771591de087521a55f2e69845cf71d82 Mon Sep 17 00:00:00 2001 From: Jimmy Assarsson Date: Fri, 8 Jul 2022 20:48:46 +0200 Subject: [PATCH 0430/1056] can: kvaser_usb: kvaser_usb_leaf: fix bittiming limits commit b3b6df2c56d80b8c6740433cff5f016668b8de70 upstream. Use correct bittiming limits depending on device. For devices based on USBcanII, Leaf M32C or Leaf i.MX28. Fixes: 080f40a6fa28 ("can: kvaser_usb: Add support for Kvaser CAN/USB devices") Fixes: b4f20130af23 ("can: kvaser_usb: add support for Kvaser Leaf v2 and usb mini PCIe") Fixes: f5d4abea3ce0 ("can: kvaser_usb: Add support for the USBcan-II family") Link: https://lore.kernel.org/all/20220603083820.800246-4-extja@kvaser.com Cc: stable@vger.kernel.org Signed-off-by: Jimmy Assarsson [mkl: remove stray netlink.h include] [mkl: keep struct can_bittiming_const kvaser_usb_flexc_bittiming_const in kvaser_usb_hydra.c] Signed-off-by: Marc Kleine-Budde Signed-off-by: Greg Kroah-Hartman --- drivers/net/can/usb/kvaser_usb/kvaser_usb.h | 2 + .../net/can/usb/kvaser_usb/kvaser_usb_hydra.c | 4 +- .../net/can/usb/kvaser_usb/kvaser_usb_leaf.c | 76 +++++++++++-------- 3 files changed, 47 insertions(+), 35 deletions(-) diff --git a/drivers/net/can/usb/kvaser_usb/kvaser_usb.h b/drivers/net/can/usb/kvaser_usb/kvaser_usb.h index 478e2eeec13603..61e67986b625e9 100644 --- a/drivers/net/can/usb/kvaser_usb/kvaser_usb.h +++ b/drivers/net/can/usb/kvaser_usb/kvaser_usb.h @@ -188,4 +188,6 @@ int kvaser_usb_send_cmd_async(struct kvaser_usb_net_priv *priv, void *cmd, int kvaser_usb_can_rx_over_error(struct net_device *netdev); +extern const struct can_bittiming_const kvaser_usb_flexc_bittiming_const; + #endif /* KVASER_USB_H */ diff --git a/drivers/net/can/usb/kvaser_usb/kvaser_usb_hydra.c b/drivers/net/can/usb/kvaser_usb/kvaser_usb_hydra.c index dcee8dc828ecc1..fce3f069cdbc0a 100644 --- a/drivers/net/can/usb/kvaser_usb/kvaser_usb_hydra.c +++ b/drivers/net/can/usb/kvaser_usb/kvaser_usb_hydra.c @@ -373,7 +373,7 @@ static const struct can_bittiming_const kvaser_usb_hydra_kcan_bittiming_c = { .brp_inc = 1, }; -static const struct can_bittiming_const kvaser_usb_hydra_flexc_bittiming_c = { +const struct can_bittiming_const kvaser_usb_flexc_bittiming_const = { .name = "kvaser_usb_flex", .tseg1_min = 4, .tseg1_max = 16, @@ -2052,7 +2052,7 @@ static const struct kvaser_usb_dev_cfg kvaser_usb_hydra_dev_cfg_flexc = { .freq = 24000000, }, .timestamp_freq = 1, - .bittiming_const = &kvaser_usb_hydra_flexc_bittiming_c, + .bittiming_const = &kvaser_usb_flexc_bittiming_const, }; static const struct kvaser_usb_dev_cfg kvaser_usb_hydra_dev_cfg_rt = { diff --git a/drivers/net/can/usb/kvaser_usb/kvaser_usb_leaf.c b/drivers/net/can/usb/kvaser_usb/kvaser_usb_leaf.c index d94101c4650804..b9c2231e4b43e6 100644 --- a/drivers/net/can/usb/kvaser_usb/kvaser_usb_leaf.c +++ b/drivers/net/can/usb/kvaser_usb/kvaser_usb_leaf.c @@ -100,16 +100,6 @@ #define USBCAN_ERROR_STATE_RX_ERROR BIT(1) #define USBCAN_ERROR_STATE_BUSERROR BIT(2) -/* bittiming parameters */ -#define KVASER_USB_TSEG1_MIN 1 -#define KVASER_USB_TSEG1_MAX 16 -#define KVASER_USB_TSEG2_MIN 1 -#define KVASER_USB_TSEG2_MAX 8 -#define KVASER_USB_SJW_MAX 4 -#define KVASER_USB_BRP_MIN 1 -#define KVASER_USB_BRP_MAX 64 -#define KVASER_USB_BRP_INC 1 - /* ctrl modes */ #define KVASER_CTRL_MODE_NORMAL 1 #define KVASER_CTRL_MODE_SILENT 2 @@ -342,48 +332,68 @@ struct kvaser_usb_err_summary { }; }; -static const struct can_bittiming_const kvaser_usb_leaf_bittiming_const = { - .name = "kvaser_usb", - .tseg1_min = KVASER_USB_TSEG1_MIN, - .tseg1_max = KVASER_USB_TSEG1_MAX, - .tseg2_min = KVASER_USB_TSEG2_MIN, - .tseg2_max = KVASER_USB_TSEG2_MAX, - .sjw_max = KVASER_USB_SJW_MAX, - .brp_min = KVASER_USB_BRP_MIN, - .brp_max = KVASER_USB_BRP_MAX, - .brp_inc = KVASER_USB_BRP_INC, +static const struct can_bittiming_const kvaser_usb_leaf_m16c_bittiming_const = { + .name = "kvaser_usb_ucii", + .tseg1_min = 4, + .tseg1_max = 16, + .tseg2_min = 2, + .tseg2_max = 8, + .sjw_max = 4, + .brp_min = 1, + .brp_max = 16, + .brp_inc = 1, +}; + +static const struct can_bittiming_const kvaser_usb_leaf_m32c_bittiming_const = { + .name = "kvaser_usb_leaf", + .tseg1_min = 3, + .tseg1_max = 16, + .tseg2_min = 2, + .tseg2_max = 8, + .sjw_max = 4, + .brp_min = 2, + .brp_max = 128, + .brp_inc = 2, }; -static const struct kvaser_usb_dev_cfg kvaser_usb_leaf_dev_cfg_8mhz = { +static const struct kvaser_usb_dev_cfg kvaser_usb_leaf_usbcan_dev_cfg = { .clock = { .freq = 8000000, }, .timestamp_freq = 1, - .bittiming_const = &kvaser_usb_leaf_bittiming_const, + .bittiming_const = &kvaser_usb_leaf_m16c_bittiming_const, +}; + +static const struct kvaser_usb_dev_cfg kvaser_usb_leaf_m32c_dev_cfg = { + .clock = { + .freq = 16000000, + }, + .timestamp_freq = 1, + .bittiming_const = &kvaser_usb_leaf_m32c_bittiming_const, }; -static const struct kvaser_usb_dev_cfg kvaser_usb_leaf_dev_cfg_16mhz = { +static const struct kvaser_usb_dev_cfg kvaser_usb_leaf_imx_dev_cfg_16mhz = { .clock = { .freq = 16000000, }, .timestamp_freq = 1, - .bittiming_const = &kvaser_usb_leaf_bittiming_const, + .bittiming_const = &kvaser_usb_flexc_bittiming_const, }; -static const struct kvaser_usb_dev_cfg kvaser_usb_leaf_dev_cfg_24mhz = { +static const struct kvaser_usb_dev_cfg kvaser_usb_leaf_imx_dev_cfg_24mhz = { .clock = { .freq = 24000000, }, .timestamp_freq = 1, - .bittiming_const = &kvaser_usb_leaf_bittiming_const, + .bittiming_const = &kvaser_usb_flexc_bittiming_const, }; -static const struct kvaser_usb_dev_cfg kvaser_usb_leaf_dev_cfg_32mhz = { +static const struct kvaser_usb_dev_cfg kvaser_usb_leaf_imx_dev_cfg_32mhz = { .clock = { .freq = 32000000, }, .timestamp_freq = 1, - .bittiming_const = &kvaser_usb_leaf_bittiming_const, + .bittiming_const = &kvaser_usb_flexc_bittiming_const, }; static void * @@ -529,17 +539,17 @@ static void kvaser_usb_leaf_get_software_info_leaf(struct kvaser_usb *dev, /* Firmware expects bittiming parameters calculated for 16MHz * clock, regardless of the actual clock */ - dev->cfg = &kvaser_usb_leaf_dev_cfg_16mhz; + dev->cfg = &kvaser_usb_leaf_m32c_dev_cfg; } else { switch (sw_options & KVASER_USB_LEAF_SWOPTION_FREQ_MASK) { case KVASER_USB_LEAF_SWOPTION_FREQ_16_MHZ_CLK: - dev->cfg = &kvaser_usb_leaf_dev_cfg_16mhz; + dev->cfg = &kvaser_usb_leaf_imx_dev_cfg_16mhz; break; case KVASER_USB_LEAF_SWOPTION_FREQ_24_MHZ_CLK: - dev->cfg = &kvaser_usb_leaf_dev_cfg_24mhz; + dev->cfg = &kvaser_usb_leaf_imx_dev_cfg_24mhz; break; case KVASER_USB_LEAF_SWOPTION_FREQ_32_MHZ_CLK: - dev->cfg = &kvaser_usb_leaf_dev_cfg_32mhz; + dev->cfg = &kvaser_usb_leaf_imx_dev_cfg_32mhz; break; } } @@ -566,7 +576,7 @@ static int kvaser_usb_leaf_get_software_info_inner(struct kvaser_usb *dev) dev->fw_version = le32_to_cpu(cmd.u.usbcan.softinfo.fw_version); dev->max_tx_urbs = le16_to_cpu(cmd.u.usbcan.softinfo.max_outstanding_tx); - dev->cfg = &kvaser_usb_leaf_dev_cfg_8mhz; + dev->cfg = &kvaser_usb_leaf_usbcan_dev_cfg; break; } From 83d3449e8ae56db4829b8d716c7843f3ed1f5f6e Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Thu, 7 Jul 2022 15:58:35 -0700 Subject: [PATCH 0431/1056] xfs: remove incorrect ASSERT in xfs_rename commit e445976537ad139162980bee015b7364e5b64fff upstream. This ASSERT in xfs_rename is a) incorrect, because (RENAME_WHITEOUT|RENAME_NOREPLACE) is a valid combination, and b) unnecessary, because actual invalid flag combinations are already handled at the vfs level in do_renameat2() before we get called. So, remove it. Reported-by: Paolo Bonzini Signed-off-by: Eric Sandeen Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong Fixes: 7dcf5c3e4527 ("xfs: add RENAME_WHITEOUT support") Reported-by: Ayushman Dutta Signed-off-by: Kuniyuki Iwashima Acked-by: Darrick J. Wong Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_inode.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 2477e301fa8284..c19f3ca605af6a 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -3128,7 +3128,6 @@ xfs_rename( * appropriately. */ if (flags & RENAME_WHITEOUT) { - ASSERT(!(flags & (RENAME_NOREPLACE | RENAME_EXCHANGE))); error = xfs_rename_alloc_whiteout(mnt_userns, target_dp, &wip); if (error) return error; From f88e79727fba1430625f794cabd46e1dc9c3837c Mon Sep 17 00:00:00 2001 From: Hui Wang Date: Mon, 18 Apr 2022 17:43:39 +0800 Subject: [PATCH 0432/1056] Revert "serial: sc16is7xx: Clear RS485 bits in the shutdown" commit 41c606879f89623dd5269eaffea640b915e9e17c upstream. This reverts commit 927728a34f11b5a27f4610bdb7068317d6fdc72a. Once the uart_port->rs485->flag is set to SER_RS485_ENABLED, the port should always work in RS485 mode. If users want the port to leave RS485 mode, they need to call ioctl() to clear SER_RS485_ENABLED. So here we shouldn't clear the RS485 bits in the shutdown(). Fixes: 927728a34f11 ("serial: sc16is7xx: Clear RS485 bits in the shutdown") Signed-off-by: Hui Wang Link: https://lore.kernel.org/r/20220418094339.678144-1-hui.wang@canonical.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/sc16is7xx.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/tty/serial/sc16is7xx.c b/drivers/tty/serial/sc16is7xx.c index e98aa7b97cc5eb..0ab788058fa2af 100644 --- a/drivers/tty/serial/sc16is7xx.c +++ b/drivers/tty/serial/sc16is7xx.c @@ -1055,12 +1055,10 @@ static void sc16is7xx_shutdown(struct uart_port *port) /* Disable all interrupts */ sc16is7xx_port_write(port, SC16IS7XX_IER_REG, 0); - /* Disable TX/RX, clear auto RS485 and RTS invert */ + /* Disable TX/RX */ sc16is7xx_port_update(port, SC16IS7XX_EFCR_REG, SC16IS7XX_EFCR_RXDISABLE_BIT | - SC16IS7XX_EFCR_TXDISABLE_BIT | - SC16IS7XX_EFCR_AUTO_RS485_BIT | - SC16IS7XX_EFCR_RTS_INVERT_BIT, + SC16IS7XX_EFCR_TXDISABLE_BIT, SC16IS7XX_EFCR_RXDISABLE_BIT | SC16IS7XX_EFCR_TXDISABLE_BIT); From d35b78cb053a4ac5243a13ccd99101e925823a81 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 16 Nov 2021 14:50:25 +0300 Subject: [PATCH 0433/1056] btrfs: fix error pointer dereference in btrfs_ioctl_rm_dev_v2() commit d815b3f2f273537cb8afaf5ab11a46851f6c03e5 upstream. If memdup_user() fails the error handing will crash when it tries to kfree() an error pointer. Just return directly because there is no cleanup required. Fixes: 1a15eb724aae ("btrfs: use btrfs_get_dev_args_from_path in dev removal ioctls") Reviewed-by: Josef Bacik Signed-off-by: Dan Carpenter Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/ioctl.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 4317720a29e855..a70505296dad39 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -3231,10 +3231,8 @@ static long btrfs_ioctl_rm_dev_v2(struct file *file, void __user *arg) return -EPERM; vol_args = memdup_user(arg, sizeof(*vol_args)); - if (IS_ERR(vol_args)) { - ret = PTR_ERR(vol_args); - goto out; - } + if (IS_ERR(vol_args)) + return PTR_ERR(vol_args); if (vol_args->flags & ~BTRFS_DEVICE_REMOVE_ARGS_MASK) { ret = -EOPNOTSUPP; From cb91c0548ff2b32b591af04c24a8f6e0cf229ab3 Mon Sep 17 00:00:00 2001 From: Ye Guojin Date: Wed, 17 Nov 2021 06:39:55 +0000 Subject: [PATCH 0434/1056] virtio-blk: modify the value type of num in virtio_queue_rq() commit 0466a39bd0b6c462338f10d18076703d14a552de upstream. This was found by coccicheck: ./drivers/block/virtio_blk.c, 334, 14-17, WARNING Unsigned expression compared with zero num < 0 Reported-by: Zeal Robot Signed-off-by: Ye Guojin Link: https://lore.kernel.org/r/20211117063955.160777-1-ye.guojin@zte.com.cn Signed-off-by: Michael S. Tsirkin Fixes: 02746e26c39e ("virtio-blk: avoid preallocating big SGL for data") Reviewed-by: Stefano Garzarella Reviewed-by: Max Gurtovoy Reviewed-by: Stefan Hajnoczi Signed-off-by: Greg Kroah-Hartman --- drivers/block/virtio_blk.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index ccc5770d7654ed..d2ba849bb8d195 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -308,7 +308,7 @@ static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *req = bd->rq; struct virtblk_req *vbr = blk_mq_rq_to_pdu(req); unsigned long flags; - unsigned int num; + int num; int qid = hctx->queue_num; int err; bool notify = false; From 3b9f491386698a7e39680cb3c375e62d0cae457d Mon Sep 17 00:00:00 2001 From: Tom Rix Date: Fri, 21 Jan 2022 05:45:22 -0800 Subject: [PATCH 0435/1056] btrfs: fix use of uninitialized variable at rm device ioctl commit 37b4599547e324589e011c20f74b021d6d25cb7f upstream. Clang static analysis reports this problem ioctl.c:3333:8: warning: 3rd function call argument is an uninitialized value ret = exclop_start_or_cancel_reloc(fs_info, cancel is only set in one branch of an if-check and is always used. So initialize to false. Fixes: 1a15eb724aae ("btrfs: use btrfs_get_dev_args_from_path in dev removal ioctls") Reviewed-by: Filipe Manana Reviewed-by: Anand Jain Signed-off-by: Tom Rix Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/ioctl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index a70505296dad39..b9dcaae7c8d51f 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -3291,7 +3291,7 @@ static long btrfs_ioctl_rm_dev(struct file *file, void __user *arg) struct block_device *bdev = NULL; fmode_t mode; int ret; - bool cancel; + bool cancel = false; if (!capable(CAP_SYS_ADMIN)) return -EPERM; From 9c26be2c3e699ad4925d4bdf944be669643a619c Mon Sep 17 00:00:00 2001 From: "daniel.starke@siemens.com" Date: Thu, 17 Feb 2022 23:31:18 -0800 Subject: [PATCH 0436/1056] tty: n_gsm: fix encoding of command/response bit commit 57435c42400ec147a527b2313188b649e81e449e upstream. n_gsm is based on the 3GPP 07.010 and its newer version is the 3GPP 27.010. See https://portal.3gpp.org/desktopmodules/Specifications/SpecificationDetails.aspx?specificationId=1516 The changes from 07.010 to 27.010 are non-functional. Therefore, I refer to the newer 27.010 here. Chapter 5.2.1.2 describes the encoding of the C/R (command/response) bit. Table 1 shows that the actual encoding of the C/R bit is inverted if the associated frame is sent by the responder. The referenced commit fixed here further broke the internal meaning of this bit in the outgoing path by always setting the C/R bit regardless of the frame type. This patch fixes both by setting the C/R bit always consistently for command (1) and response (0) frames and inverting it later for the responder where necessary. The meaning of this bit in the debug output is being preserved and shows the bit as if it was encoded by the initiator. This reflects only the frame type rather than the encoded combination of communication side and frame type. Fixes: cc0f42122a7e ("tty: n_gsm: Modify CR,PF bit when config requester") Cc: stable@vger.kernel.org Signed-off-by: Daniel Starke Link: https://lore.kernel.org/r/20220218073123.2121-2-daniel.starke@siemens.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Greg Kroah-Hartman --- drivers/tty/n_gsm.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/drivers/tty/n_gsm.c b/drivers/tty/n_gsm.c index 4a430f6ca17062..4946a241e323f7 100644 --- a/drivers/tty/n_gsm.c +++ b/drivers/tty/n_gsm.c @@ -471,7 +471,7 @@ static void gsm_hex_dump_bytes(const char *fname, const u8 *data, * gsm_print_packet - display a frame for debug * @hdr: header to print before decode * @addr: address EA from the frame - * @cr: C/R bit from the frame + * @cr: C/R bit seen as initiator * @control: control including PF bit * @data: following data bytes * @dlen: length of data @@ -571,7 +571,7 @@ static int gsm_stuff_frame(const u8 *input, u8 *output, int len) * gsm_send - send a control frame * @gsm: our GSM mux * @addr: address for control frame - * @cr: command/response bit + * @cr: command/response bit seen as initiator * @control: control byte including PF bit * * Format up and transmit a control frame. These do not go via the @@ -586,11 +586,15 @@ static void gsm_send(struct gsm_mux *gsm, int addr, int cr, int control) int len; u8 cbuf[10]; u8 ibuf[3]; + int ocr; + + /* toggle C/R coding if not initiator */ + ocr = cr ^ (gsm->initiator ? 0 : 1); switch (gsm->encoding) { case 0: cbuf[0] = GSM0_SOF; - cbuf[1] = (addr << 2) | (cr << 1) | EA; + cbuf[1] = (addr << 2) | (ocr << 1) | EA; cbuf[2] = control; cbuf[3] = EA; /* Length of data = 0 */ cbuf[4] = 0xFF - gsm_fcs_add_block(INIT_FCS, cbuf + 1, 3); @@ -600,7 +604,7 @@ static void gsm_send(struct gsm_mux *gsm, int addr, int cr, int control) case 1: case 2: /* Control frame + packing (but not frame stuffing) in mode 1 */ - ibuf[0] = (addr << 2) | (cr << 1) | EA; + ibuf[0] = (addr << 2) | (ocr << 1) | EA; ibuf[1] = control; ibuf[2] = 0xFF - gsm_fcs_add_block(INIT_FCS, ibuf, 2); /* Stuffing may double the size worst case */ @@ -630,7 +634,7 @@ static void gsm_send(struct gsm_mux *gsm, int addr, int cr, int control) static inline void gsm_response(struct gsm_mux *gsm, int addr, int control) { - gsm_send(gsm, addr, 1, control); + gsm_send(gsm, addr, 0, control); } /** @@ -1875,10 +1879,10 @@ static void gsm_queue(struct gsm_mux *gsm) goto invalid; cr = gsm->address & 1; /* C/R bit */ + cr ^= gsm->initiator ? 0 : 1; /* Flip so 1 always means command */ gsm_print_packet("<--", address, cr, gsm->control, gsm->buf, gsm->len); - cr ^= 1 - gsm->initiator; /* Flip so 1 always means command */ dlci = gsm->dlci[address]; switch (gsm->control) { From 7208101ded1e9dcc52c8f0f8b16474211c871c1a Mon Sep 17 00:00:00 2001 From: Miaoqian Lin Date: Thu, 12 May 2022 06:16:10 +0400 Subject: [PATCH 0437/1056] ARM: meson: Fix refcount leak in meson_smp_prepare_cpus [ Upstream commit 34d2cd3fccced12b958b8848e3eff0ee4296764c ] of_find_compatible_node() returns a node pointer with refcount incremented, we should use of_node_put() on it when done. Add missing of_node_put() to avoid refcount leak. Fixes: d850f3e5d296 ("ARM: meson: Add SMP bringup code for Meson8 and Meson8b") Signed-off-by: Miaoqian Lin Reviewed-by: Martin Blumenstingl Signed-off-by: Neil Armstrong Link: https://lore.kernel.org/r/20220512021611.47921-1-linmq006@gmail.com Signed-off-by: Sasha Levin --- arch/arm/mach-meson/platsmp.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm/mach-meson/platsmp.c b/arch/arm/mach-meson/platsmp.c index 4b8ad728bb42aa..32ac60b89fdcc5 100644 --- a/arch/arm/mach-meson/platsmp.c +++ b/arch/arm/mach-meson/platsmp.c @@ -71,6 +71,7 @@ static void __init meson_smp_prepare_cpus(const char *scu_compatible, } sram_base = of_iomap(node, 0); + of_node_put(node); if (!sram_base) { pr_err("Couldn't map SRAM registers\n"); return; @@ -91,6 +92,7 @@ static void __init meson_smp_prepare_cpus(const char *scu_compatible, } scu_base = of_iomap(node, 0); + of_node_put(node); if (!scu_base) { pr_err("Couldn't map SCU registers\n"); return; From 29029ca6eed728c82a87be896ab893d5f6a58847 Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Wed, 25 May 2022 21:49:56 -0500 Subject: [PATCH 0438/1056] pinctrl: sunxi: a83t: Fix NAND function name for some pins [ Upstream commit aaefa29270d9551b604165a08406543efa9d16f5 ] The other NAND pins on Port C use the "nand0" function name. "nand0" also matches all of the other Allwinner SoCs. Fixes: 4730f33f0d82 ("pinctrl: sunxi: add allwinner A83T PIO controller support") Signed-off-by: Samuel Holland Acked-by: Jernej Skrabec Link: https://lore.kernel.org/r/20220526024956.49500-1-samuel@sholland.org Signed-off-by: Linus Walleij Signed-off-by: Sasha Levin --- drivers/pinctrl/sunxi/pinctrl-sun8i-a83t.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/pinctrl/sunxi/pinctrl-sun8i-a83t.c b/drivers/pinctrl/sunxi/pinctrl-sun8i-a83t.c index 4ada80317a3bd5..b5c1a8f363f32e 100644 --- a/drivers/pinctrl/sunxi/pinctrl-sun8i-a83t.c +++ b/drivers/pinctrl/sunxi/pinctrl-sun8i-a83t.c @@ -158,26 +158,26 @@ static const struct sunxi_desc_pin sun8i_a83t_pins[] = { SUNXI_PIN(SUNXI_PINCTRL_PIN(C, 14), SUNXI_FUNCTION(0x0, "gpio_in"), SUNXI_FUNCTION(0x1, "gpio_out"), - SUNXI_FUNCTION(0x2, "nand"), /* DQ6 */ + SUNXI_FUNCTION(0x2, "nand0"), /* DQ6 */ SUNXI_FUNCTION(0x3, "mmc2")), /* D6 */ SUNXI_PIN(SUNXI_PINCTRL_PIN(C, 15), SUNXI_FUNCTION(0x0, "gpio_in"), SUNXI_FUNCTION(0x1, "gpio_out"), - SUNXI_FUNCTION(0x2, "nand"), /* DQ7 */ + SUNXI_FUNCTION(0x2, "nand0"), /* DQ7 */ SUNXI_FUNCTION(0x3, "mmc2")), /* D7 */ SUNXI_PIN(SUNXI_PINCTRL_PIN(C, 16), SUNXI_FUNCTION(0x0, "gpio_in"), SUNXI_FUNCTION(0x1, "gpio_out"), - SUNXI_FUNCTION(0x2, "nand"), /* DQS */ + SUNXI_FUNCTION(0x2, "nand0"), /* DQS */ SUNXI_FUNCTION(0x3, "mmc2")), /* RST */ SUNXI_PIN(SUNXI_PINCTRL_PIN(C, 17), SUNXI_FUNCTION(0x0, "gpio_in"), SUNXI_FUNCTION(0x1, "gpio_out"), - SUNXI_FUNCTION(0x2, "nand")), /* CE2 */ + SUNXI_FUNCTION(0x2, "nand0")), /* CE2 */ SUNXI_PIN(SUNXI_PINCTRL_PIN(C, 18), SUNXI_FUNCTION(0x0, "gpio_in"), SUNXI_FUNCTION(0x1, "gpio_out"), - SUNXI_FUNCTION(0x2, "nand")), /* CE3 */ + SUNXI_FUNCTION(0x2, "nand0")), /* CE3 */ /* Hole */ SUNXI_PIN(SUNXI_PINCTRL_PIN(D, 2), SUNXI_FUNCTION(0x0, "gpio_in"), From 25e61636a5c3b4b45ca4007a1d2938299e4b5f2f Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Wed, 4 May 2022 18:08:57 +0100 Subject: [PATCH 0439/1056] ASoC: rt711: Add endianness flag in snd_soc_component_driver [ Upstream commit 33f06beac3ade10834a82ad4105dcd91d4b00d61 ] The endianness flag is used on the CODEC side to specify an ambivalence to endian, typically because it is lost over the hardware link. This device receives audio over a SoundWire DAI and as such should have endianness applied. Signed-off-by: Charles Keepax Link: https://lore.kernel.org/r/20220504170905.332415-31-ckeepax@opensource.cirrus.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/codecs/rt711.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/codecs/rt711.c b/sound/soc/codecs/rt711.c index a7c5608a0ef879..9cc7283a19c223 100644 --- a/sound/soc/codecs/rt711.c +++ b/sound/soc/codecs/rt711.c @@ -943,6 +943,7 @@ static const struct snd_soc_component_driver soc_codec_dev_rt711 = { .num_dapm_routes = ARRAY_SIZE(rt711_audio_map), .set_jack = rt711_set_jack_detect, .remove = rt711_remove, + .endianness = 1, }; static int rt711_set_sdw_stream(struct snd_soc_dai *dai, void *sdw_stream, From ac80a45ddb628003b8b18e5e8c2fc1d08d3e2961 Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Wed, 4 May 2022 18:08:58 +0100 Subject: [PATCH 0440/1056] ASoC: rt711-sdca: Add endianness flag in snd_soc_component_driver [ Upstream commit 3e50a5001055d79c04ea1c79fe4b4ff937a3339c ] The endianness flag is used on the CODEC side to specify an ambivalence to endian, typically because it is lost over the hardware link. This device receives audio over a SoundWire DAI and as such should have endianness applied. Signed-off-by: Charles Keepax Link: https://lore.kernel.org/r/20220504170905.332415-32-ckeepax@opensource.cirrus.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/codecs/rt711-sdca.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/codecs/rt711-sdca.c b/sound/soc/codecs/rt711-sdca.c index 2e992589f1e420..b168e9303ea911 100644 --- a/sound/soc/codecs/rt711-sdca.c +++ b/sound/soc/codecs/rt711-sdca.c @@ -1208,6 +1208,7 @@ static const struct snd_soc_component_driver soc_sdca_dev_rt711 = { .num_dapm_routes = ARRAY_SIZE(rt711_sdca_audio_map), .set_jack = rt711_sdca_set_jack_detect, .remove = rt711_sdca_remove, + .endianness = 1, }; static int rt711_sdca_set_sdw_stream(struct snd_soc_dai *dai, void *sdw_stream, From 4157343a6a1a5f344436967cad1cc048b33a991e Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Mon, 6 Jun 2022 15:37:52 -0500 Subject: [PATCH 0441/1056] ASoC: codecs: rt700/rt711/rt711-sdca: resume bus/codec in .set_jack_detect [ Upstream commit 40737057b48f1b4db67b0d766b95c87ba8fc5e03 ] The .set_jack_detect() codec component callback is invoked during card registration, which happens when the machine driver is probed. The issue is that this callback can race with the bus suspend/resume, and IO timeouts can happen. This can be reproduced very easily if the machine driver is 'blacklisted' and manually probed after the bus suspends. The bus and codec need to be re-initialized using pm_runtime helpers. Previous contributions tried to make sure accesses to the bus during the .set_jack_detect() component callback only happen when the bus is active. This was done by changing the regcache status on a component remove. This is however a layering violation, the regcache status should only be modified on device probe, suspend and resume. The component probe/remove should not modify how the device regcache is handled. This solution also didn't handle all the possible race conditions, and the RT700 headset codec was not handled. This patch tries to resume the codec device before handling the jack initializations. In case the codec has not yet been initialized, pm_runtime may not be enabled yet, so we don't squelch the -EACCES error code and only stop the jack information. When the codec reports as attached, the jack initialization will proceed as usual. BugLink: https://github.com/thesofproject/linux/issues/3643 Fixes: 7ad4d237e7c4a ('ASoC: rt711-sdca: Add RT711 SDCA vendor-specific driver') Fixes: 899b12542b089 ('ASoC: rt711: add snd_soc_component remove callback') Signed-off-by: Pierre-Louis Bossart Reviewed-by: Rander Wang Reviewed-by: Bard Liao Link: https://lore.kernel.org/r/20220606203752.144159-8-pierre-louis.bossart@linux.intel.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/codecs/rt700.c | 16 +++++++++++++--- sound/soc/codecs/rt711-sdca.c | 26 ++++++++++++++------------ sound/soc/codecs/rt711.c | 24 +++++++++++++----------- 3 files changed, 40 insertions(+), 26 deletions(-) diff --git a/sound/soc/codecs/rt700.c b/sound/soc/codecs/rt700.c index 921382724f9cdb..614a40247679b2 100644 --- a/sound/soc/codecs/rt700.c +++ b/sound/soc/codecs/rt700.c @@ -315,17 +315,27 @@ static int rt700_set_jack_detect(struct snd_soc_component *component, struct snd_soc_jack *hs_jack, void *data) { struct rt700_priv *rt700 = snd_soc_component_get_drvdata(component); + int ret; rt700->hs_jack = hs_jack; - if (!rt700->hw_init) { - dev_dbg(&rt700->slave->dev, - "%s hw_init not ready yet\n", __func__); + ret = pm_runtime_resume_and_get(component->dev); + if (ret < 0) { + if (ret != -EACCES) { + dev_err(component->dev, "%s: failed to resume %d\n", __func__, ret); + return ret; + } + + /* pm_runtime not enabled yet */ + dev_dbg(component->dev, "%s: skipping jack init for now\n", __func__); return 0; } rt700_jack_init(rt700); + pm_runtime_mark_last_busy(component->dev); + pm_runtime_put_autosuspend(component->dev); + return 0; } diff --git a/sound/soc/codecs/rt711-sdca.c b/sound/soc/codecs/rt711-sdca.c index b168e9303ea911..c15fb98eac86d3 100644 --- a/sound/soc/codecs/rt711-sdca.c +++ b/sound/soc/codecs/rt711-sdca.c @@ -487,16 +487,27 @@ static int rt711_sdca_set_jack_detect(struct snd_soc_component *component, struct snd_soc_jack *hs_jack, void *data) { struct rt711_sdca_priv *rt711 = snd_soc_component_get_drvdata(component); + int ret; rt711->hs_jack = hs_jack; - if (!rt711->hw_init) { - dev_dbg(&rt711->slave->dev, - "%s hw_init not ready yet\n", __func__); + ret = pm_runtime_resume_and_get(component->dev); + if (ret < 0) { + if (ret != -EACCES) { + dev_err(component->dev, "%s: failed to resume %d\n", __func__, ret); + return ret; + } + + /* pm_runtime not enabled yet */ + dev_dbg(component->dev, "%s: skipping jack init for now\n", __func__); return 0; } rt711_sdca_jack_init(rt711); + + pm_runtime_mark_last_busy(component->dev); + pm_runtime_put_autosuspend(component->dev); + return 0; } @@ -1190,14 +1201,6 @@ static int rt711_sdca_probe(struct snd_soc_component *component) return 0; } -static void rt711_sdca_remove(struct snd_soc_component *component) -{ - struct rt711_sdca_priv *rt711 = snd_soc_component_get_drvdata(component); - - regcache_cache_only(rt711->regmap, true); - regcache_cache_only(rt711->mbq_regmap, true); -} - static const struct snd_soc_component_driver soc_sdca_dev_rt711 = { .probe = rt711_sdca_probe, .controls = rt711_sdca_snd_controls, @@ -1207,7 +1210,6 @@ static const struct snd_soc_component_driver soc_sdca_dev_rt711 = { .dapm_routes = rt711_sdca_audio_map, .num_dapm_routes = ARRAY_SIZE(rt711_sdca_audio_map), .set_jack = rt711_sdca_set_jack_detect, - .remove = rt711_sdca_remove, .endianness = 1, }; diff --git a/sound/soc/codecs/rt711.c b/sound/soc/codecs/rt711.c index 9cc7283a19c223..fafb0ba8349fd6 100644 --- a/sound/soc/codecs/rt711.c +++ b/sound/soc/codecs/rt711.c @@ -450,17 +450,27 @@ static int rt711_set_jack_detect(struct snd_soc_component *component, struct snd_soc_jack *hs_jack, void *data) { struct rt711_priv *rt711 = snd_soc_component_get_drvdata(component); + int ret; rt711->hs_jack = hs_jack; - if (!rt711->hw_init) { - dev_dbg(&rt711->slave->dev, - "%s hw_init not ready yet\n", __func__); + ret = pm_runtime_resume_and_get(component->dev); + if (ret < 0) { + if (ret != -EACCES) { + dev_err(component->dev, "%s: failed to resume %d\n", __func__, ret); + return ret; + } + + /* pm_runtime not enabled yet */ + dev_dbg(component->dev, "%s: skipping jack init for now\n", __func__); return 0; } rt711_jack_init(rt711); + pm_runtime_mark_last_busy(component->dev); + pm_runtime_put_autosuspend(component->dev); + return 0; } @@ -925,13 +935,6 @@ static int rt711_probe(struct snd_soc_component *component) return 0; } -static void rt711_remove(struct snd_soc_component *component) -{ - struct rt711_priv *rt711 = snd_soc_component_get_drvdata(component); - - regcache_cache_only(rt711->regmap, true); -} - static const struct snd_soc_component_driver soc_codec_dev_rt711 = { .probe = rt711_probe, .set_bias_level = rt711_set_bias_level, @@ -942,7 +945,6 @@ static const struct snd_soc_component_driver soc_codec_dev_rt711 = { .dapm_routes = rt711_audio_map, .num_dapm_routes = ARRAY_SIZE(rt711_audio_map), .set_jack = rt711_set_jack_detect, - .remove = rt711_remove, .endianness = 1, }; From 216094007699c8eddb20f06dde98374138da9de7 Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Sun, 1 May 2022 20:40:16 +0200 Subject: [PATCH 0442/1056] arm64: dts: qcom: msm8994: Fix CPU6/7 reg values [ Upstream commit 47bf59c4755930f616dd90c8c6a85f40a6d347ea ] CPU6 and CPU7 were mistakengly pointing to CPU5 reg. Fix it. Fixes: 02d8091bbca0 ("arm64: dts: qcom: msm8994: Add a proper CPU map") Signed-off-by: Konrad Dybcio Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20220501184016.64138-1-konrad.dybcio@somainline.org Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/qcom/msm8994.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/qcom/msm8994.dtsi b/arch/arm64/boot/dts/qcom/msm8994.dtsi index 3c27671c8b5c31..a8dc8163ee82da 100644 --- a/arch/arm64/boot/dts/qcom/msm8994.dtsi +++ b/arch/arm64/boot/dts/qcom/msm8994.dtsi @@ -93,7 +93,7 @@ CPU6: cpu@102 { device_type = "cpu"; compatible = "arm,cortex-a57"; - reg = <0x0 0x101>; + reg = <0x0 0x102>; enable-method = "psci"; next-level-cache = <&L2_1>; }; @@ -101,7 +101,7 @@ CPU7: cpu@103 { device_type = "cpu"; compatible = "arm,cortex-a57"; - reg = <0x0 0x101>; + reg = <0x0 0x103>; enable-method = "psci"; next-level-cache = <&L2_1>; }; From 89a718d1d080c3e0a0da6b6e3613f166bed5561a Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Tue, 31 May 2022 15:47:35 +0300 Subject: [PATCH 0443/1056] arm64: dts: qcom: sdm845: use dispcc AHB clock for mdss node [ Upstream commit 3ba500dee327e0261e728edec8a4f2f563d2760c ] It was noticed that on sdm845 after an MDSS suspend/resume cycle the driver can not read HW_REV registers properly (they will return 0 instead). Chaning the "iface" clock from <&gcc GCC_DISP_AHB_CLK> to <&dispcc DISP_CC_MDSS_AHB_CLK> fixes the issue. Fixes: 08c2a076d18f ("arm64: dts: qcom: sdm845: Add dpu to sdm845 dts file") Signed-off-by: Dmitry Baryshkov Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20220531124735.1165582-1-dmitry.baryshkov@linaro.org Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/qcom/sdm845.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/qcom/sdm845.dtsi b/arch/arm64/boot/dts/qcom/sdm845.dtsi index d20eacfc10176a..ea7a272d267a7c 100644 --- a/arch/arm64/boot/dts/qcom/sdm845.dtsi +++ b/arch/arm64/boot/dts/qcom/sdm845.dtsi @@ -4147,7 +4147,7 @@ power-domains = <&dispcc MDSS_GDSC>; - clocks = <&gcc GCC_DISP_AHB_CLK>, + clocks = <&dispcc DISP_CC_MDSS_AHB_CLK>, <&dispcc DISP_CC_MDSS_MDP_CLK>; clock-names = "iface", "core"; From ee1ced3dd8639c4ada74117b37906e1e1df8d23a Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Tue, 21 Jun 2022 13:45:05 -0300 Subject: [PATCH 0444/1056] ARM: mxs_defconfig: Enable the framebuffer [ Upstream commit b10ef5f2ddb3a5a22ac0936c8d91a50ac5e55e77 ] Currently, when booting Linux on a imx28-evk board there is no display activity. Enable CONFIG_FB which is nowadays required for CONFIG_DRM_PANEL_LVDS, CONFIG_DRM_PANEL_SIMPLE, CONFIG_DRM_PANEL_SEIKO_43WVF1G, CONFIG_FB_MODE_HELPERS, CONFIG_BACKLIGHT_PWM, CONFIG_BACKLIGHT_GPIO, CONFIG_FRAMEBUFFER_CONSOLE, CONFIG_LOGO, CONFIG_FONTS, CONFIG_FONT_8x8 and CONFIG_FONT_8x16. Based on commit c54467482ffd ("ARM: imx_v6_v7_defconfig: enable fb"). Fixes: f611b1e7624c ("drm: Avoid circular dependencies for CONFIG_FB") Signed-off-by: Fabio Estevam Signed-off-by: Shawn Guo Signed-off-by: Sasha Levin --- arch/arm/configs/mxs_defconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/configs/mxs_defconfig b/arch/arm/configs/mxs_defconfig index ca32446b187f5d..f53086ddc48b03 100644 --- a/arch/arm/configs/mxs_defconfig +++ b/arch/arm/configs/mxs_defconfig @@ -93,6 +93,7 @@ CONFIG_REGULATOR_FIXED_VOLTAGE=y CONFIG_DRM=y CONFIG_DRM_PANEL_SEIKO_43WVF1G=y CONFIG_DRM_MXSFB=y +CONFIG_FB=y CONFIG_FB_MODE_HELPERS=y CONFIG_LCD_CLASS_DEVICE=y CONFIG_BACKLIGHT_CLASS_DEVICE=y From 401d27fec6140194551f1b0c86015a00273fe4ba Mon Sep 17 00:00:00 2001 From: Peng Fan Date: Wed, 22 Jun 2022 14:13:57 +0800 Subject: [PATCH 0445/1056] arm64: dts: imx8mp-evk: correct mmc pad settings [ Upstream commit 01785f1f156511c4f285786b4192245d4f476bf1 ] According to RM bit layout, BIT3 and BIT0 are reserved. 8 7 6 5 4 3 2 1 0 PE HYS PUE ODE FSEL X DSE X Not set reserved bit. Fixes: 9e847693c6f3 ("arm64: dts: freescale: Add i.MX8MP EVK board support") Signed-off-by: Peng Fan Reviewed-by: Rasmus Villemoes Signed-off-by: Shawn Guo Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/freescale/imx8mp-evk.dts | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm64/boot/dts/freescale/imx8mp-evk.dts b/arch/arm64/boot/dts/freescale/imx8mp-evk.dts index 7b99fad6e4d6ec..3c4369d6468ce3 100644 --- a/arch/arm64/boot/dts/freescale/imx8mp-evk.dts +++ b/arch/arm64/boot/dts/freescale/imx8mp-evk.dts @@ -377,7 +377,7 @@ pinctrl_reg_usdhc2_vmmc: regusdhc2vmmcgrp { fsl,pins = < - MX8MP_IOMUXC_SD2_RESET_B__GPIO2_IO19 0x41 + MX8MP_IOMUXC_SD2_RESET_B__GPIO2_IO19 0x40 >; }; @@ -402,7 +402,7 @@ MX8MP_IOMUXC_SD2_DATA1__USDHC2_DATA1 0x1d0 MX8MP_IOMUXC_SD2_DATA2__USDHC2_DATA2 0x1d0 MX8MP_IOMUXC_SD2_DATA3__USDHC2_DATA3 0x1d0 - MX8MP_IOMUXC_GPIO1_IO04__USDHC2_VSELECT 0xc1 + MX8MP_IOMUXC_GPIO1_IO04__USDHC2_VSELECT 0xc0 >; }; @@ -414,7 +414,7 @@ MX8MP_IOMUXC_SD2_DATA1__USDHC2_DATA1 0x1d4 MX8MP_IOMUXC_SD2_DATA2__USDHC2_DATA2 0x1d4 MX8MP_IOMUXC_SD2_DATA3__USDHC2_DATA3 0x1d4 - MX8MP_IOMUXC_GPIO1_IO04__USDHC2_VSELECT 0xc1 + MX8MP_IOMUXC_GPIO1_IO04__USDHC2_VSELECT 0xc0 >; }; @@ -426,7 +426,7 @@ MX8MP_IOMUXC_SD2_DATA1__USDHC2_DATA1 0x1d6 MX8MP_IOMUXC_SD2_DATA2__USDHC2_DATA2 0x1d6 MX8MP_IOMUXC_SD2_DATA3__USDHC2_DATA3 0x1d6 - MX8MP_IOMUXC_GPIO1_IO04__USDHC2_VSELECT 0xc1 + MX8MP_IOMUXC_GPIO1_IO04__USDHC2_VSELECT 0xc0 >; }; From 637b3dab51f7efca293347ef16f124f83412603e Mon Sep 17 00:00:00 2001 From: Sherry Sun Date: Wed, 22 Jun 2022 14:13:58 +0800 Subject: [PATCH 0446/1056] arm64: dts: imx8mp-evk: correct the uart2 pinctl value [ Upstream commit 2d4fb72b681205eed4553d8802632bd3270be3ba ] According to the IOMUXC_SW_PAD_CTL_PAD_UART2_RXD/TXD register define in imx8mp RM, bit0 and bit3 are reserved, and the uart2 rx/tx pin should enable the pull up, so need to set bit8 to 1. The original pinctl value 0x49 is incorrect and needs to be changed to 0x140, same as uart1 and uart3. Fixes: 9e847693c6f3 ("arm64: dts: freescale: Add i.MX8MP EVK board support") Reviewed-by: Haibo Chen Signed-off-by: Sherry Sun Signed-off-by: Peng Fan Reviewed-by: Rasmus Villemoes Signed-off-by: Shawn Guo Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/freescale/imx8mp-evk.dts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/freescale/imx8mp-evk.dts b/arch/arm64/boot/dts/freescale/imx8mp-evk.dts index 3c4369d6468ce3..3f424a8937f12e 100644 --- a/arch/arm64/boot/dts/freescale/imx8mp-evk.dts +++ b/arch/arm64/boot/dts/freescale/imx8mp-evk.dts @@ -383,8 +383,8 @@ pinctrl_uart2: uart2grp { fsl,pins = < - MX8MP_IOMUXC_UART2_RXD__UART2_DCE_RX 0x49 - MX8MP_IOMUXC_UART2_TXD__UART2_DCE_TX 0x49 + MX8MP_IOMUXC_UART2_RXD__UART2_DCE_RX 0x140 + MX8MP_IOMUXC_UART2_TXD__UART2_DCE_TX 0x140 >; }; From f1571c8c8724cd7d1b1ff1826ba484c831afed73 Mon Sep 17 00:00:00 2001 From: Peng Fan Date: Wed, 22 Jun 2022 14:13:59 +0800 Subject: [PATCH 0447/1056] arm64: dts: imx8mp-evk: correct gpio-led pad settings [ Upstream commit b838582ab8d5fb11b2c0275056a9f34e1d94fece ] 0x19 is not a valid setting. According to RM bit layout, BIT3 and BIT0 are reserved. 8 7 6 5 4 3 2 1 0 PE HYS PUE ODE FSEL X DSE X Correct setting with PE PUE set, DSE set to 0. Fixes: 50d336b12f34 ("arm64: dts: imx8mp-evk: Add GPIO LED support") Signed-off-by: Peng Fan Reviewed-by: Rasmus Villemoes Signed-off-by: Shawn Guo Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/freescale/imx8mp-evk.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/freescale/imx8mp-evk.dts b/arch/arm64/boot/dts/freescale/imx8mp-evk.dts index 3f424a8937f12e..be3b01b5e1590d 100644 --- a/arch/arm64/boot/dts/freescale/imx8mp-evk.dts +++ b/arch/arm64/boot/dts/freescale/imx8mp-evk.dts @@ -351,7 +351,7 @@ pinctrl_gpio_led: gpioledgrp { fsl,pins = < - MX8MP_IOMUXC_NAND_READY_B__GPIO3_IO16 0x19 + MX8MP_IOMUXC_NAND_READY_B__GPIO3_IO16 0x140 >; }; From ebad4d73ab1c695f4846ce8413e0a66c0280ec06 Mon Sep 17 00:00:00 2001 From: Peng Fan Date: Wed, 22 Jun 2022 14:14:00 +0800 Subject: [PATCH 0448/1056] arm64: dts: imx8mp-evk: correct vbus pad settings [ Upstream commit e2c00820a99c55c9bb40642d5818a904a1e0d664 ] 0x19 is not a valid setting. According to RM bit layout, BIT3 and BIT0 are reserved. 8 7 6 5 4 3 2 1 0 PE HYS PUE ODE FSEL X DSE X Not set reserved bit. Fixes: 43da4f92a611 ("arm64: dts: imx8mp-evk: enable usb1 as host mode") Signed-off-by: Peng Fan Reviewed-by: Rasmus Villemoes Signed-off-by: Shawn Guo Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/freescale/imx8mp-evk.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/freescale/imx8mp-evk.dts b/arch/arm64/boot/dts/freescale/imx8mp-evk.dts index be3b01b5e1590d..c794e8662da8ee 100644 --- a/arch/arm64/boot/dts/freescale/imx8mp-evk.dts +++ b/arch/arm64/boot/dts/freescale/imx8mp-evk.dts @@ -390,7 +390,7 @@ pinctrl_usb1_vbus: usb1grp { fsl,pins = < - MX8MP_IOMUXC_GPIO1_IO14__USB2_OTG_PWR 0x19 + MX8MP_IOMUXC_GPIO1_IO14__USB2_OTG_PWR 0x10 >; }; From 37413a0ea0900c45524623e34bea2f541e3bcd59 Mon Sep 17 00:00:00 2001 From: Peng Fan Date: Wed, 22 Jun 2022 14:14:01 +0800 Subject: [PATCH 0449/1056] arm64: dts: imx8mp-evk: correct eqos pad settings [ Upstream commit e6e1bc0ec9e8ad212fa46d8878a6e17cd31fdf7b ] According to RM bit layout, BIT3 and BIT0 are reserved. 8 7 6 5 4 3 2 1 0 PE HYS PUE ODE FSEL X DSE X Although function is not broken, we should not set reserved bit. Fixes: dc6d5dc89bad ("arm64: dts: imx8mp-evk: enable EQOS ethernet") Signed-off-by: Peng Fan Reviewed-by: Rasmus Villemoes Signed-off-by: Shawn Guo Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/freescale/imx8mp-evk.dts | 30 ++++++++++---------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/arch/arm64/boot/dts/freescale/imx8mp-evk.dts b/arch/arm64/boot/dts/freescale/imx8mp-evk.dts index c794e8662da8ee..33664c21767345 100644 --- a/arch/arm64/boot/dts/freescale/imx8mp-evk.dts +++ b/arch/arm64/boot/dts/freescale/imx8mp-evk.dts @@ -285,21 +285,21 @@ &iomuxc { pinctrl_eqos: eqosgrp { fsl,pins = < - MX8MP_IOMUXC_ENET_MDC__ENET_QOS_MDC 0x3 - MX8MP_IOMUXC_ENET_MDIO__ENET_QOS_MDIO 0x3 - MX8MP_IOMUXC_ENET_RD0__ENET_QOS_RGMII_RD0 0x91 - MX8MP_IOMUXC_ENET_RD1__ENET_QOS_RGMII_RD1 0x91 - MX8MP_IOMUXC_ENET_RD2__ENET_QOS_RGMII_RD2 0x91 - MX8MP_IOMUXC_ENET_RD3__ENET_QOS_RGMII_RD3 0x91 - MX8MP_IOMUXC_ENET_RXC__CCM_ENET_QOS_CLOCK_GENERATE_RX_CLK 0x91 - MX8MP_IOMUXC_ENET_RX_CTL__ENET_QOS_RGMII_RX_CTL 0x91 - MX8MP_IOMUXC_ENET_TD0__ENET_QOS_RGMII_TD0 0x1f - MX8MP_IOMUXC_ENET_TD1__ENET_QOS_RGMII_TD1 0x1f - MX8MP_IOMUXC_ENET_TD2__ENET_QOS_RGMII_TD2 0x1f - MX8MP_IOMUXC_ENET_TD3__ENET_QOS_RGMII_TD3 0x1f - MX8MP_IOMUXC_ENET_TX_CTL__ENET_QOS_RGMII_TX_CTL 0x1f - MX8MP_IOMUXC_ENET_TXC__CCM_ENET_QOS_CLOCK_GENERATE_TX_CLK 0x1f - MX8MP_IOMUXC_SAI2_RXC__GPIO4_IO22 0x19 + MX8MP_IOMUXC_ENET_MDC__ENET_QOS_MDC 0x2 + MX8MP_IOMUXC_ENET_MDIO__ENET_QOS_MDIO 0x2 + MX8MP_IOMUXC_ENET_RD0__ENET_QOS_RGMII_RD0 0x90 + MX8MP_IOMUXC_ENET_RD1__ENET_QOS_RGMII_RD1 0x90 + MX8MP_IOMUXC_ENET_RD2__ENET_QOS_RGMII_RD2 0x90 + MX8MP_IOMUXC_ENET_RD3__ENET_QOS_RGMII_RD3 0x90 + MX8MP_IOMUXC_ENET_RXC__CCM_ENET_QOS_CLOCK_GENERATE_RX_CLK 0x90 + MX8MP_IOMUXC_ENET_RX_CTL__ENET_QOS_RGMII_RX_CTL 0x90 + MX8MP_IOMUXC_ENET_TD0__ENET_QOS_RGMII_TD0 0x16 + MX8MP_IOMUXC_ENET_TD1__ENET_QOS_RGMII_TD1 0x16 + MX8MP_IOMUXC_ENET_TD2__ENET_QOS_RGMII_TD2 0x16 + MX8MP_IOMUXC_ENET_TD3__ENET_QOS_RGMII_TD3 0x16 + MX8MP_IOMUXC_ENET_TX_CTL__ENET_QOS_RGMII_TX_CTL 0x16 + MX8MP_IOMUXC_ENET_TXC__CCM_ENET_QOS_CLOCK_GENERATE_TX_CLK 0x16 + MX8MP_IOMUXC_SAI2_RXC__GPIO4_IO22 0x10 >; }; From b34da817e3fa2075030054fb24bce46b48d74906 Mon Sep 17 00:00:00 2001 From: Peng Fan Date: Wed, 22 Jun 2022 14:14:04 +0800 Subject: [PATCH 0450/1056] arm64: dts: imx8mp-evk: correct I2C1 pad settings [ Upstream commit 05a7f43478e890513d571f36660bfedc1482a588 ] According to RM bit layout, BIT3 and BIT0 are reserved. 8 7 6 5 4 3 2 1 0 PE HYS PUE ODE FSEL X DSE X Although function is not broken, we should not set reserved bit. Fixes: 5497bc2a2bff ("arm64: dts: imx8mp-evk: Add PMIC device") Signed-off-by: Peng Fan Reviewed-by: Rasmus Villemoes Signed-off-by: Shawn Guo Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/freescale/imx8mp-evk.dts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/freescale/imx8mp-evk.dts b/arch/arm64/boot/dts/freescale/imx8mp-evk.dts index 33664c21767345..de6f3297fea45c 100644 --- a/arch/arm64/boot/dts/freescale/imx8mp-evk.dts +++ b/arch/arm64/boot/dts/freescale/imx8mp-evk.dts @@ -357,8 +357,8 @@ pinctrl_i2c1: i2c1grp { fsl,pins = < - MX8MP_IOMUXC_I2C1_SCL__I2C1_SCL 0x400001c3 - MX8MP_IOMUXC_I2C1_SDA__I2C1_SDA 0x400001c3 + MX8MP_IOMUXC_I2C1_SCL__I2C1_SCL 0x400001c2 + MX8MP_IOMUXC_I2C1_SDA__I2C1_SDA 0x400001c2 >; }; From 67a21eb8c48e106d145ad0784192973690e65170 Mon Sep 17 00:00:00 2001 From: Peng Fan Date: Wed, 22 Jun 2022 14:14:05 +0800 Subject: [PATCH 0451/1056] arm64: dts: imx8mp-evk: correct I2C3 pad settings [ Upstream commit 0836de513ebaae5f03014641eac996290d67493d ] According to RM bit layout, BIT3 and BIT0 are reserved. 8 7 6 5 4 3 2 1 0 PE HYS PUE ODE FSEL X DSE X Although function is not broken, we should not set reserved bit. Fixes: 5e4a67ff7f69 ("arm64: dts: imx8mp-evk: Add i2c3 support") Signed-off-by: Peng Fan Reviewed-by: Rasmus Villemoes Signed-off-by: Shawn Guo Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/freescale/imx8mp-evk.dts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/freescale/imx8mp-evk.dts b/arch/arm64/boot/dts/freescale/imx8mp-evk.dts index de6f3297fea45c..5c9fb39dd99e52 100644 --- a/arch/arm64/boot/dts/freescale/imx8mp-evk.dts +++ b/arch/arm64/boot/dts/freescale/imx8mp-evk.dts @@ -364,8 +364,8 @@ pinctrl_i2c3: i2c3grp { fsl,pins = < - MX8MP_IOMUXC_I2C3_SCL__I2C3_SCL 0x400001c3 - MX8MP_IOMUXC_I2C3_SDA__I2C3_SDA 0x400001c3 + MX8MP_IOMUXC_I2C3_SCL__I2C3_SCL 0x400001c2 + MX8MP_IOMUXC_I2C3_SDA__I2C3_SDA 0x400001c2 >; }; From ea8dbe870c848246cea670b9fa9173bbecb85ce1 Mon Sep 17 00:00:00 2001 From: Peng Fan Date: Wed, 22 Jun 2022 14:14:07 +0800 Subject: [PATCH 0452/1056] arm64: dts: imx8mp-phyboard-pollux-rdk: correct uart pad settings [ Upstream commit e266c155bd88e95f9b86379d6b0add6ac6e5452e ] BIT3 and BIT0 are reserved bits, should not touch. Fixes: 846f752866bd ("arm64: dts: imx8mp-phyboard-pollux-rdk: Change debug UART") Signed-off-by: Peng Fan Reviewed-by: Rasmus Villemoes Signed-off-by: Shawn Guo Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/freescale/imx8mp-phyboard-pollux-rdk.dts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/freescale/imx8mp-phyboard-pollux-rdk.dts b/arch/arm64/boot/dts/freescale/imx8mp-phyboard-pollux-rdk.dts index 984a6b9ded8d7a..e340769548972f 100644 --- a/arch/arm64/boot/dts/freescale/imx8mp-phyboard-pollux-rdk.dts +++ b/arch/arm64/boot/dts/freescale/imx8mp-phyboard-pollux-rdk.dts @@ -156,8 +156,8 @@ pinctrl_uart1: uart1grp { fsl,pins = < - MX8MP_IOMUXC_UART1_RXD__UART1_DCE_RX 0x49 - MX8MP_IOMUXC_UART1_TXD__UART1_DCE_TX 0x49 + MX8MP_IOMUXC_UART1_RXD__UART1_DCE_RX 0x40 + MX8MP_IOMUXC_UART1_TXD__UART1_DCE_TX 0x40 >; }; From 44826474a39ab417e8f56b0efb6ae84c23e54764 Mon Sep 17 00:00:00 2001 From: Peng Fan Date: Wed, 22 Jun 2022 14:14:08 +0800 Subject: [PATCH 0453/1056] arm64: dts: imx8mp-phyboard-pollux-rdk: correct eqos pad settings [ Upstream commit bae4de618efe1c41d34aa2e6cef8b08e46256667 ] BIT3 and BIT0 are reserved bits, should not touch. Fixes: 6f96852619d5 ("arm64: dts: freescale: Add support EQOS MAC on phyBOARD-Pollux-i.MX8MP") Signed-off-by: Peng Fan Reviewed-by: Rasmus Villemoes Signed-off-by: Shawn Guo Signed-off-by: Sasha Levin --- .../freescale/imx8mp-phyboard-pollux-rdk.dts | 28 +++++++++---------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/arch/arm64/boot/dts/freescale/imx8mp-phyboard-pollux-rdk.dts b/arch/arm64/boot/dts/freescale/imx8mp-phyboard-pollux-rdk.dts index e340769548972f..cefd3d36f93f3f 100644 --- a/arch/arm64/boot/dts/freescale/imx8mp-phyboard-pollux-rdk.dts +++ b/arch/arm64/boot/dts/freescale/imx8mp-phyboard-pollux-rdk.dts @@ -116,20 +116,20 @@ &iomuxc { pinctrl_eqos: eqosgrp { fsl,pins = < - MX8MP_IOMUXC_ENET_MDC__ENET_QOS_MDC 0x3 - MX8MP_IOMUXC_ENET_MDIO__ENET_QOS_MDIO 0x3 - MX8MP_IOMUXC_ENET_RD0__ENET_QOS_RGMII_RD0 0x91 - MX8MP_IOMUXC_ENET_RD1__ENET_QOS_RGMII_RD1 0x91 - MX8MP_IOMUXC_ENET_RD2__ENET_QOS_RGMII_RD2 0x91 - MX8MP_IOMUXC_ENET_RD3__ENET_QOS_RGMII_RD3 0x91 - MX8MP_IOMUXC_ENET_RXC__CCM_ENET_QOS_CLOCK_GENERATE_RX_CLK 0x91 - MX8MP_IOMUXC_ENET_RX_CTL__ENET_QOS_RGMII_RX_CTL 0x91 - MX8MP_IOMUXC_ENET_TD0__ENET_QOS_RGMII_TD0 0x1f - MX8MP_IOMUXC_ENET_TD1__ENET_QOS_RGMII_TD1 0x1f - MX8MP_IOMUXC_ENET_TD2__ENET_QOS_RGMII_TD2 0x1f - MX8MP_IOMUXC_ENET_TD3__ENET_QOS_RGMII_TD3 0x1f - MX8MP_IOMUXC_ENET_TX_CTL__ENET_QOS_RGMII_TX_CTL 0x1f - MX8MP_IOMUXC_ENET_TXC__CCM_ENET_QOS_CLOCK_GENERATE_TX_CLK 0x1f + MX8MP_IOMUXC_ENET_MDC__ENET_QOS_MDC 0x2 + MX8MP_IOMUXC_ENET_MDIO__ENET_QOS_MDIO 0x2 + MX8MP_IOMUXC_ENET_RD0__ENET_QOS_RGMII_RD0 0x90 + MX8MP_IOMUXC_ENET_RD1__ENET_QOS_RGMII_RD1 0x90 + MX8MP_IOMUXC_ENET_RD2__ENET_QOS_RGMII_RD2 0x90 + MX8MP_IOMUXC_ENET_RD3__ENET_QOS_RGMII_RD3 0x90 + MX8MP_IOMUXC_ENET_RXC__CCM_ENET_QOS_CLOCK_GENERATE_RX_CLK 0x90 + MX8MP_IOMUXC_ENET_RX_CTL__ENET_QOS_RGMII_RX_CTL 0x90 + MX8MP_IOMUXC_ENET_TD0__ENET_QOS_RGMII_TD0 0x16 + MX8MP_IOMUXC_ENET_TD1__ENET_QOS_RGMII_TD1 0x16 + MX8MP_IOMUXC_ENET_TD2__ENET_QOS_RGMII_TD2 0x16 + MX8MP_IOMUXC_ENET_TD3__ENET_QOS_RGMII_TD3 0x16 + MX8MP_IOMUXC_ENET_TX_CTL__ENET_QOS_RGMII_TX_CTL 0x16 + MX8MP_IOMUXC_ENET_TXC__CCM_ENET_QOS_CLOCK_GENERATE_TX_CLK 0x16 MX8MP_IOMUXC_SAI1_MCLK__GPIO4_IO20 0x10 >; }; From c041165d8f04c49212bc424f1ac047c065250cae Mon Sep 17 00:00:00 2001 From: Peng Fan Date: Wed, 22 Jun 2022 14:14:09 +0800 Subject: [PATCH 0454/1056] arm64: dts: imx8mp-phyboard-pollux-rdk: correct i2c2 & mmc settings [ Upstream commit 242d8ee9111171a6e68249aaff62643c513be6ec ] BIT3 and BIT0 are reserved bits, should not touch. Fixes: 88f7f6bcca37 ("arm64: dts: freescale: Add support for phyBOARD-Pollux-i.MX8MP") Signed-off-by: Peng Fan Reviewed-by: Rasmus Villemoes Signed-off-by: Shawn Guo Signed-off-by: Sasha Levin --- .../dts/freescale/imx8mp-phyboard-pollux-rdk.dts | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/arm64/boot/dts/freescale/imx8mp-phyboard-pollux-rdk.dts b/arch/arm64/boot/dts/freescale/imx8mp-phyboard-pollux-rdk.dts index cefd3d36f93f3f..6aa720bafe2898 100644 --- a/arch/arm64/boot/dts/freescale/imx8mp-phyboard-pollux-rdk.dts +++ b/arch/arm64/boot/dts/freescale/imx8mp-phyboard-pollux-rdk.dts @@ -136,21 +136,21 @@ pinctrl_i2c2: i2c2grp { fsl,pins = < - MX8MP_IOMUXC_I2C2_SCL__I2C2_SCL 0x400001c3 - MX8MP_IOMUXC_I2C2_SDA__I2C2_SDA 0x400001c3 + MX8MP_IOMUXC_I2C2_SCL__I2C2_SCL 0x400001c2 + MX8MP_IOMUXC_I2C2_SDA__I2C2_SDA 0x400001c2 >; }; pinctrl_i2c2_gpio: i2c2gpiogrp { fsl,pins = < - MX8MP_IOMUXC_I2C2_SCL__GPIO5_IO16 0x1e3 - MX8MP_IOMUXC_I2C2_SDA__GPIO5_IO17 0x1e3 + MX8MP_IOMUXC_I2C2_SCL__GPIO5_IO16 0x1e2 + MX8MP_IOMUXC_I2C2_SDA__GPIO5_IO17 0x1e2 >; }; pinctrl_reg_usdhc2_vmmc: regusdhc2vmmcgrp { fsl,pins = < - MX8MP_IOMUXC_SD2_RESET_B__GPIO2_IO19 0x41 + MX8MP_IOMUXC_SD2_RESET_B__GPIO2_IO19 0x40 >; }; @@ -175,7 +175,7 @@ MX8MP_IOMUXC_SD2_DATA1__USDHC2_DATA1 0x1d0 MX8MP_IOMUXC_SD2_DATA2__USDHC2_DATA2 0x1d0 MX8MP_IOMUXC_SD2_DATA3__USDHC2_DATA3 0x1d0 - MX8MP_IOMUXC_GPIO1_IO04__USDHC2_VSELECT 0xc1 + MX8MP_IOMUXC_GPIO1_IO04__USDHC2_VSELECT 0xc0 >; }; @@ -187,7 +187,7 @@ MX8MP_IOMUXC_SD2_DATA1__USDHC2_DATA1 0x1d4 MX8MP_IOMUXC_SD2_DATA2__USDHC2_DATA2 0x1d4 MX8MP_IOMUXC_SD2_DATA3__USDHC2_DATA3 0x1d4 - MX8MP_IOMUXC_GPIO1_IO04__USDHC2_VSELECT 0xc1 + MX8MP_IOMUXC_GPIO1_IO04__USDHC2_VSELECT 0xc0 >; }; @@ -199,7 +199,7 @@ MX8MP_IOMUXC_SD2_DATA1__USDHC2_DATA1 0x1d6 MX8MP_IOMUXC_SD2_DATA2__USDHC2_DATA2 0x1d6 MX8MP_IOMUXC_SD2_DATA3__USDHC2_DATA3 0x1d6 - MX8MP_IOMUXC_GPIO1_IO04__USDHC2_VSELECT 0xc1 + MX8MP_IOMUXC_GPIO1_IO04__USDHC2_VSELECT 0xc0 >; }; }; From 76292cf4b3bc9b89d49a21ef6d792b03362ea1b6 Mon Sep 17 00:00:00 2001 From: Andrei Lalaev Date: Wed, 25 May 2022 22:04:25 +0300 Subject: [PATCH 0455/1056] pinctrl: sunxi: sunxi_pconf_set: use correct offset [ Upstream commit cd4c1e65a32afd003b08ad4aafe1e4d3e4e8e61b ] Some Allwinner SoCs have 2 pinctrls (PIO and R_PIO). Previous implementation used absolute pin numbering and it was incorrect for R_PIO pinctrl. It's necessary to take into account the base pin number. Fixes: 90be64e27621 ("pinctrl: sunxi: implement pin_config_set") Signed-off-by: Andrei Lalaev Reviewed-by: Samuel Holland Link: https://lore.kernel.org/r/20220525190423.410609-1-andrey.lalaev@gmail.com Signed-off-by: Linus Walleij Signed-off-by: Sasha Levin --- drivers/pinctrl/sunxi/pinctrl-sunxi.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/pinctrl/sunxi/pinctrl-sunxi.c b/drivers/pinctrl/sunxi/pinctrl-sunxi.c index ce3f9ea415117b..1431ab21aca6f3 100644 --- a/drivers/pinctrl/sunxi/pinctrl-sunxi.c +++ b/drivers/pinctrl/sunxi/pinctrl-sunxi.c @@ -544,6 +544,8 @@ static int sunxi_pconf_set(struct pinctrl_dev *pctldev, unsigned pin, struct sunxi_pinctrl *pctl = pinctrl_dev_get_drvdata(pctldev); int i; + pin -= pctl->desc->pin_base; + for (i = 0; i < num_configs; i++) { enum pin_config_param param; unsigned long flags; From ec5533b2ce265dc3c074736d1939ef0433087e93 Mon Sep 17 00:00:00 2001 From: Stephan Gerhold Date: Mon, 27 Jun 2022 15:59:38 +0200 Subject: [PATCH 0456/1056] arm64: dts: qcom: msm8992-*: Fix vdd_lvs1_2-supply typo [ Upstream commit 5fb779558f1c97e2bf2794cb59553e569c38e2f9 ] "make dtbs_check" complains about the missing "-supply" suffix for vdd_lvs1_2 which is clearly a typo, originally introduced in the msm8994-smd-rpm.dtsi file and apparently later copied to msm8992-xiaomi-libra.dts: msm8992-lg-bullhead-rev-10/101.dtb: pm8994-regulators: 'vdd_lvs1_2' does not match any of the regexes: '.*-supply$', '^((s|l|lvs|5vs)[0-9]*)|(boost-bypass)|(bob)$', 'pinctrl-[0-9]+' From schema: regulator/qcom,smd-rpm-regulator.yaml msm8992-xiaomi-libra.dtb: pm8994-regulators: 'vdd_lvs1_2' does not match any of the regexes: '.*-supply$', '^((s|l|lvs|5vs)[0-9]*)|(boost-bypass)|(bob)$', 'pinctrl-[0-9]+' From schema: regulator/qcom,smd-rpm-regulator.yaml Reported-by: Rob Herring Cc: Konrad Dybcio Fixes: f3b2c99e73be ("arm64: dts: Enable onboard SDHCI on msm8992") Fixes: 0f5cdb31e850 ("arm64: dts: qcom: Add Xiaomi Libra (Mi 4C) device tree") Signed-off-by: Stephan Gerhold Reviewed-by: Konrad Dybcio Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20220627135938.2901871-1-stephan.gerhold@kernkonzept.com Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/qcom/msm8992-bullhead-rev-101.dts | 2 +- arch/arm64/boot/dts/qcom/msm8992-xiaomi-libra.dts | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/qcom/msm8992-bullhead-rev-101.dts b/arch/arm64/boot/dts/qcom/msm8992-bullhead-rev-101.dts index 1ccca83292ac9e..c7d191dc6d4bad 100644 --- a/arch/arm64/boot/dts/qcom/msm8992-bullhead-rev-101.dts +++ b/arch/arm64/boot/dts/qcom/msm8992-bullhead-rev-101.dts @@ -74,7 +74,7 @@ vdd_l17_29-supply = <&vph_pwr>; vdd_l20_21-supply = <&vph_pwr>; vdd_l25-supply = <&pm8994_s5>; - vdd_lvs1_2 = <&pm8994_s4>; + vdd_lvs1_2-supply = <&pm8994_s4>; /* S1, S2, S6 and S12 are managed by RPMPD */ diff --git a/arch/arm64/boot/dts/qcom/msm8992-xiaomi-libra.dts b/arch/arm64/boot/dts/qcom/msm8992-xiaomi-libra.dts index 357d55496e750f..a3d6340a0c55be 100644 --- a/arch/arm64/boot/dts/qcom/msm8992-xiaomi-libra.dts +++ b/arch/arm64/boot/dts/qcom/msm8992-xiaomi-libra.dts @@ -142,7 +142,7 @@ vdd_l17_29-supply = <&vph_pwr>; vdd_l20_21-supply = <&vph_pwr>; vdd_l25-supply = <&pm8994_s5>; - vdd_lvs1_2 = <&pm8994_s4>; + vdd_lvs1_2-supply = <&pm8994_s4>; /* S1, S2, S6 and S12 are managed by RPMPD */ From 9ec5fe55ba75e2a49392509c94fd8ef063ef1148 Mon Sep 17 00:00:00 2001 From: Claudiu Beznea Date: Mon, 23 May 2022 12:24:19 +0300 Subject: [PATCH 0457/1056] ARM: at91: pm: use proper compatible for sama5d2's rtc [ Upstream commit ddc980da8043779119acaca106c6d9b445c9b65b ] Use proper compatible strings for SAMA5D2's RTC IPs. This is necessary for configuring wakeup sources for ULP1 PM mode. Fixes: d7484f5c6b3b ("ARM: at91: pm: configure wakeup sources for ULP1 mode") Signed-off-by: Claudiu Beznea Link: https://lore.kernel.org/r/20220523092421.317345-2-claudiu.beznea@microchip.com Signed-off-by: Sasha Levin --- arch/arm/mach-at91/pm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mach-at91/pm.c b/arch/arm/mach-at91/pm.c index 8711d6824c1fa5..cde99c9d0b2ed7 100644 --- a/arch/arm/mach-at91/pm.c +++ b/arch/arm/mach-at91/pm.c @@ -146,7 +146,7 @@ static const struct wakeup_source_info ws_info[] = { static const struct of_device_id sama5d2_ws_ids[] = { { .compatible = "atmel,sama5d2-gem", .data = &ws_info[0] }, - { .compatible = "atmel,at91rm9200-rtc", .data = &ws_info[1] }, + { .compatible = "atmel,sama5d2-rtc", .data = &ws_info[1] }, { .compatible = "atmel,sama5d3-udc", .data = &ws_info[2] }, { .compatible = "atmel,at91rm9200-ohci", .data = &ws_info[2] }, { .compatible = "usb-ohci", .data = &ws_info[2] }, From cfd0e717bd93b725d92c9ef9354fc55a643b688a Mon Sep 17 00:00:00 2001 From: Claudiu Beznea Date: Mon, 23 May 2022 12:24:20 +0300 Subject: [PATCH 0458/1056] ARM: at91: pm: use proper compatibles for sam9x60's rtc and rtt [ Upstream commit 641522665dbb25ce117c78746df1aad8b58c80e5 ] Use proper compatible strings for SAM9X60's RTC and RTT IPs. These are necessary for configuring wakeup sources for ULP1 PM mode. Fixes: eaedc0d379da ("ARM: at91: pm: add ULP1 support for SAM9X60") Signed-off-by: Claudiu Beznea Link: https://lore.kernel.org/r/20220523092421.317345-3-claudiu.beznea@microchip.com Signed-off-by: Sasha Levin --- arch/arm/mach-at91/pm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/mach-at91/pm.c b/arch/arm/mach-at91/pm.c index cde99c9d0b2ed7..cbad3609b7831f 100644 --- a/arch/arm/mach-at91/pm.c +++ b/arch/arm/mach-at91/pm.c @@ -157,12 +157,12 @@ static const struct of_device_id sama5d2_ws_ids[] = { }; static const struct of_device_id sam9x60_ws_ids[] = { - { .compatible = "atmel,at91sam9x5-rtc", .data = &ws_info[1] }, + { .compatible = "microchip,sam9x60-rtc", .data = &ws_info[1] }, { .compatible = "atmel,at91rm9200-ohci", .data = &ws_info[2] }, { .compatible = "usb-ohci", .data = &ws_info[2] }, { .compatible = "atmel,at91sam9g45-ehci", .data = &ws_info[2] }, { .compatible = "usb-ehci", .data = &ws_info[2] }, - { .compatible = "atmel,at91sam9260-rtt", .data = &ws_info[4] }, + { .compatible = "microchip,sam9x60-rtt", .data = &ws_info[4] }, { .compatible = "cdns,sam9x60-macb", .data = &ws_info[5] }, { /* sentinel */ } }; From a65b92628ae0f16e97e130825bafb8cd50b3d2e3 Mon Sep 17 00:00:00 2001 From: Claudiu Beznea Date: Mon, 23 May 2022 12:24:21 +0300 Subject: [PATCH 0459/1056] ARM: at91: pm: use proper compatibles for sama7g5's rtc and rtt [ Upstream commit 1c40169b35ad58906814d53a517ac92db3d20d5f ] Use proper compatible strings for SAMA7G5's RTC and RTT IPs. These are necessary for configuring wakeup sources for ULP1 PM mode. Fixes: 6501330f9f5e ("ARM: at91: pm: add pm support for SAMA7G5") Signed-off-by: Claudiu Beznea Link: https://lore.kernel.org/r/20220523092421.317345-4-claudiu.beznea@microchip.com Signed-off-by: Sasha Levin --- arch/arm/mach-at91/pm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/mach-at91/pm.c b/arch/arm/mach-at91/pm.c index cbad3609b7831f..ed1050404ef0a7 100644 --- a/arch/arm/mach-at91/pm.c +++ b/arch/arm/mach-at91/pm.c @@ -168,13 +168,13 @@ static const struct of_device_id sam9x60_ws_ids[] = { }; static const struct of_device_id sama7g5_ws_ids[] = { - { .compatible = "atmel,at91sam9x5-rtc", .data = &ws_info[1] }, + { .compatible = "microchip,sama7g5-rtc", .data = &ws_info[1] }, { .compatible = "microchip,sama7g5-ohci", .data = &ws_info[2] }, { .compatible = "usb-ohci", .data = &ws_info[2] }, { .compatible = "atmel,at91sam9g45-ehci", .data = &ws_info[2] }, { .compatible = "usb-ehci", .data = &ws_info[2] }, { .compatible = "microchip,sama7g5-sdhci", .data = &ws_info[3] }, - { .compatible = "atmel,at91sam9260-rtt", .data = &ws_info[4] }, + { .compatible = "microchip,sama7g5-rtt", .data = &ws_info[4] }, { /* sentinel */ } }; From f5b0e6d7b453193efbb26bb91e0827f3217f6bfb Mon Sep 17 00:00:00 2001 From: Eugen Hristev Date: Tue, 7 Jun 2022 12:04:54 +0300 Subject: [PATCH 0460/1056] ARM: dts: at91: sam9x60ek: fix eeprom compatible and size [ Upstream commit f2cbbc3f926316ccf8ef9363d8a60c1110afc1c7 ] The board has a microchip 24aa025e48 eeprom, which is a 2 Kbits memory, so it's compatible with at24c02 not at24c32. Also the size property is wrong, it's not 128 bytes, but 256 bytes. Thus removing and leaving it to the default (256). Fixes: 1e5f532c27371 ("ARM: dts: at91: sam9x60: add device tree for soc and board") Signed-off-by: Eugen Hristev Reviewed-by: Claudiu Beznea Signed-off-by: Claudiu Beznea Link: https://lore.kernel.org/r/20220607090455.80433-1-eugen.hristev@microchip.com Signed-off-by: Sasha Levin --- arch/arm/boot/dts/at91-sam9x60ek.dts | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/at91-sam9x60ek.dts b/arch/arm/boot/dts/at91-sam9x60ek.dts index b1068cca422872..fd8dc1183b3e88 100644 --- a/arch/arm/boot/dts/at91-sam9x60ek.dts +++ b/arch/arm/boot/dts/at91-sam9x60ek.dts @@ -233,10 +233,9 @@ status = "okay"; eeprom@53 { - compatible = "atmel,24c32"; + compatible = "atmel,24c02"; reg = <0x53>; pagesize = <16>; - size = <128>; status = "okay"; }; }; From e3ee4ffa3c924ccd40a72a6374fe6dbb33a995ed Mon Sep 17 00:00:00 2001 From: Eugen Hristev Date: Tue, 7 Jun 2022 12:04:55 +0300 Subject: [PATCH 0461/1056] ARM: dts: at91: sama5d2_icp: fix eeprom compatibles [ Upstream commit 416ce193d73a734ded6d09fe141017b38af1c567 ] The eeprom memories on the board are microchip 24aa025e48, which are 2 Kbits and are compatible with at24c02 not at24c32. Fixes: 68a95ef72cefe ("ARM: dts: at91: sama5d2-icp: add SAMA5D2-ICP") Signed-off-by: Eugen Hristev Reviewed-by: Claudiu Beznea Signed-off-by: Claudiu Beznea Link: https://lore.kernel.org/r/20220607090455.80433-2-eugen.hristev@microchip.com Signed-off-by: Sasha Levin --- arch/arm/boot/dts/at91-sama5d2_icp.dts | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm/boot/dts/at91-sama5d2_icp.dts b/arch/arm/boot/dts/at91-sama5d2_icp.dts index e06b58724ca83e..fd1a288f686bca 100644 --- a/arch/arm/boot/dts/at91-sama5d2_icp.dts +++ b/arch/arm/boot/dts/at91-sama5d2_icp.dts @@ -323,21 +323,21 @@ status = "okay"; eeprom@50 { - compatible = "atmel,24c32"; + compatible = "atmel,24c02"; reg = <0x50>; pagesize = <16>; status = "okay"; }; eeprom@52 { - compatible = "atmel,24c32"; + compatible = "atmel,24c02"; reg = <0x52>; pagesize = <16>; status = "disabled"; }; eeprom@53 { - compatible = "atmel,24c32"; + compatible = "atmel,24c02"; reg = <0x53>; pagesize = <16>; status = "disabled"; From a2b92fffd51b9b7fddf77427f52a55d2f25a9692 Mon Sep 17 00:00:00 2001 From: Mihai Sain Date: Thu, 16 Jun 2022 11:13:44 +0300 Subject: [PATCH 0462/1056] ARM: at91: fix soc detection for SAM9X60 SiPs [ Upstream commit 35074df65a8d8c5328a83e2eea948f7bbc8e6e08 ] Fix SoC detection for SAM9X60 SiPs: SAM9X60D5M SAM9X60D1G SAM9X60D6K Fixes: af3a10513cd6 ("drivers: soc: atmel: add per soc id and version match masks") Signed-off-by: Mihai Sain Signed-off-by: Claudiu Beznea Link: https://lore.kernel.org/r/20220616081344.1978664-1-claudiu.beznea@microchip.com Signed-off-by: Sasha Levin --- drivers/soc/atmel/soc.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/soc/atmel/soc.c b/drivers/soc/atmel/soc.c index a490ad7e090f26..9e3d370114474d 100644 --- a/drivers/soc/atmel/soc.c +++ b/drivers/soc/atmel/soc.c @@ -91,14 +91,14 @@ static const struct at91_soc socs[] __initconst = { AT91_SOC(SAM9X60_CIDR_MATCH, AT91_CIDR_MATCH_MASK, AT91_CIDR_VERSION_MASK, SAM9X60_EXID_MATCH, "sam9x60", "sam9x60"), - AT91_SOC(SAM9X60_CIDR_MATCH, SAM9X60_D5M_EXID_MATCH, - AT91_CIDR_VERSION_MASK, SAM9X60_EXID_MATCH, + AT91_SOC(SAM9X60_CIDR_MATCH, AT91_CIDR_MATCH_MASK, + AT91_CIDR_VERSION_MASK, SAM9X60_D5M_EXID_MATCH, "sam9x60 64MiB DDR2 SiP", "sam9x60"), - AT91_SOC(SAM9X60_CIDR_MATCH, SAM9X60_D1G_EXID_MATCH, - AT91_CIDR_VERSION_MASK, SAM9X60_EXID_MATCH, + AT91_SOC(SAM9X60_CIDR_MATCH, AT91_CIDR_MATCH_MASK, + AT91_CIDR_VERSION_MASK, SAM9X60_D1G_EXID_MATCH, "sam9x60 128MiB DDR2 SiP", "sam9x60"), - AT91_SOC(SAM9X60_CIDR_MATCH, SAM9X60_D6K_EXID_MATCH, - AT91_CIDR_VERSION_MASK, SAM9X60_EXID_MATCH, + AT91_SOC(SAM9X60_CIDR_MATCH, AT91_CIDR_MATCH_MASK, + AT91_CIDR_VERSION_MASK, SAM9X60_D6K_EXID_MATCH, "sam9x60 8MiB SDRAM SiP", "sam9x60"), #endif #ifdef CONFIG_SOC_SAMA5 From e7a1d51009217125e71b2287f0c57909bcca0914 Mon Sep 17 00:00:00 2001 From: Ivan Malov Date: Tue, 28 Jun 2022 12:18:48 +0300 Subject: [PATCH 0463/1056] xsk: Clear page contiguity bit when unmapping pool [ Upstream commit 512d1999b8e94a5d43fba3afc73e774849674742 ] When a XSK pool gets mapped, xp_check_dma_contiguity() adds bit 0x1 to pages' DMA addresses that go in ascending order and at 4K stride. The problem is that the bit does not get cleared before doing unmap. As a result, a lot of warnings from iommu_dma_unmap_page() are seen in dmesg, which indicates that lookups by iommu_iova_to_phys() fail. Fixes: 2b43470add8c ("xsk: Introduce AF_XDP buffer allocation API") Signed-off-by: Ivan Malov Signed-off-by: Daniel Borkmann Acked-by: Magnus Karlsson Link: https://lore.kernel.org/bpf/20220628091848.534803-1-ivan.malov@oktetlabs.ru Signed-off-by: Sasha Levin --- net/xdp/xsk_buff_pool.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c index fc7fbfc1e5861b..ccedbbd27692e6 100644 --- a/net/xdp/xsk_buff_pool.c +++ b/net/xdp/xsk_buff_pool.c @@ -326,6 +326,7 @@ static void __xp_dma_unmap(struct xsk_dma_map *dma_map, unsigned long attrs) for (i = 0; i < dma_map->dma_pages_cnt; i++) { dma = &dma_map->dma_pages[i]; if (*dma) { + *dma &= ~XSK_NEXT_PG_CONTIG_MASK; dma_unmap_page_attrs(dma_map->dev, *dma, PAGE_SIZE, DMA_BIDIRECTIONAL, attrs); *dma = 0; From d2bf1a6480e8d44658a8ac3bdcec081238873212 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Wed, 22 Jun 2022 08:37:43 +0200 Subject: [PATCH 0464/1056] i2c: piix4: Fix a memory leak in the EFCH MMIO support [ Upstream commit 8ad59b397f86a4d8014966fdc0552095a0c4fb2b ] The recently added support for EFCH MMIO regions introduced a memory leak in that code path. The leak is caused by the fact that release_resource() merely removes the resource from the tree but does not free its memory. We need to call release_mem_region() instead, which does free the memory. As a nice side effect, this brings back some symmetry between the legacy and MMIO paths. Signed-off-by: Jean Delvare Reported-by: Yi Zhang Tested-by: Yi Zhang Reviewed-by: Terry Bowman Tested-by: Terry Bowman Fixes: 7c148722d074 ("i2c: piix4: Add EFCH MMIO support to region request and release") Signed-off-by: Wolfram Sang Signed-off-by: Sasha Levin --- drivers/i2c/busses/i2c-piix4.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/drivers/i2c/busses/i2c-piix4.c b/drivers/i2c/busses/i2c-piix4.c index ac8e7d60672a1b..39cb1b7bb8656c 100644 --- a/drivers/i2c/busses/i2c-piix4.c +++ b/drivers/i2c/busses/i2c-piix4.c @@ -161,7 +161,6 @@ static const char *piix4_aux_port_name_sb800 = " port 1"; struct sb800_mmio_cfg { void __iomem *addr; - struct resource *res; bool use_mmio; }; @@ -179,13 +178,11 @@ static int piix4_sb800_region_request(struct device *dev, struct sb800_mmio_cfg *mmio_cfg) { if (mmio_cfg->use_mmio) { - struct resource *res; void __iomem *addr; - res = request_mem_region_muxed(SB800_PIIX4_FCH_PM_ADDR, - SB800_PIIX4_FCH_PM_SIZE, - "sb800_piix4_smb"); - if (!res) { + if (!request_mem_region_muxed(SB800_PIIX4_FCH_PM_ADDR, + SB800_PIIX4_FCH_PM_SIZE, + "sb800_piix4_smb")) { dev_err(dev, "SMBus base address memory region 0x%x already in use.\n", SB800_PIIX4_FCH_PM_ADDR); @@ -195,12 +192,12 @@ static int piix4_sb800_region_request(struct device *dev, addr = ioremap(SB800_PIIX4_FCH_PM_ADDR, SB800_PIIX4_FCH_PM_SIZE); if (!addr) { - release_resource(res); + release_mem_region(SB800_PIIX4_FCH_PM_ADDR, + SB800_PIIX4_FCH_PM_SIZE); dev_err(dev, "SMBus base address mapping failed.\n"); return -ENOMEM; } - mmio_cfg->res = res; mmio_cfg->addr = addr; return 0; @@ -222,7 +219,8 @@ static void piix4_sb800_region_release(struct device *dev, { if (mmio_cfg->use_mmio) { iounmap(mmio_cfg->addr); - release_resource(mmio_cfg->res); + release_mem_region(SB800_PIIX4_FCH_PM_ADDR, + SB800_PIIX4_FCH_PM_SIZE); return; } From 9d1e322a910346f15e59b5d85d780dd74b6d17cb Mon Sep 17 00:00:00 2001 From: Lukasz Cieplicki Date: Tue, 31 May 2022 12:54:20 +0200 Subject: [PATCH 0465/1056] i40e: Fix dropped jumbo frames statistics [ Upstream commit 1adb1563e7b7ec659379a18e607e8bc3522d8a78 ] Dropped packets caused by too large frames were not included in dropped RX packets statistics. Issue was caused by not reading the GL_RXERR1 register. That register stores count of packet which was have been dropped due to too large size. Fix it by reading GL_RXERR1 register for each interface. Repro steps: Send a packet larger than the set MTU to SUT Observe rx statists: ethtool -S | grep rx | grep -v ": 0" Fixes: 41a9e55c89be ("i40e: add missing VSI statistics") Signed-off-by: Lukasz Cieplicki Signed-off-by: Jedrzej Jagielski Tested-by: Gurucharan (A Contingent worker at Intel) Signed-off-by: Tony Nguyen Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/i40e/i40e.h | 16 ++++ drivers/net/ethernet/intel/i40e/i40e_main.c | 73 +++++++++++++++++++ .../net/ethernet/intel/i40e/i40e_register.h | 13 ++++ drivers/net/ethernet/intel/i40e/i40e_type.h | 1 + 4 files changed, 103 insertions(+) diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h index 56a3a6d1dbe417..210f09118edea6 100644 --- a/drivers/net/ethernet/intel/i40e/i40e.h +++ b/drivers/net/ethernet/intel/i40e/i40e.h @@ -37,6 +37,7 @@ #include #include #include +#include #include "i40e_type.h" #include "i40e_prototype.h" #include @@ -1087,6 +1088,21 @@ static inline void i40e_write_fd_input_set(struct i40e_pf *pf, (u32)(val & 0xFFFFFFFFULL)); } +/** + * i40e_get_pf_count - get PCI PF count. + * @hw: pointer to a hw. + * + * Reports the function number of the highest PCI physical + * function plus 1 as it is loaded from the NVM. + * + * Return: PCI PF count. + **/ +static inline u32 i40e_get_pf_count(struct i40e_hw *hw) +{ + return FIELD_GET(I40E_GLGEN_PCIFCNCNT_PCIPFCNT_MASK, + rd32(hw, I40E_GLGEN_PCIFCNCNT)); +} + /* needed by i40e_ethtool.c */ int i40e_up(struct i40e_vsi *vsi); void i40e_down(struct i40e_vsi *vsi); diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 9bc05d671ad5f4..02594e4d6258c6 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -549,6 +549,47 @@ void i40e_pf_reset_stats(struct i40e_pf *pf) pf->hw_csum_rx_error = 0; } +/** + * i40e_compute_pci_to_hw_id - compute index form PCI function. + * @vsi: ptr to the VSI to read from. + * @hw: ptr to the hardware info. + **/ +static u32 i40e_compute_pci_to_hw_id(struct i40e_vsi *vsi, struct i40e_hw *hw) +{ + int pf_count = i40e_get_pf_count(hw); + + if (vsi->type == I40E_VSI_SRIOV) + return (hw->port * BIT(7)) / pf_count + vsi->vf_id; + + return hw->port + BIT(7); +} + +/** + * i40e_stat_update64 - read and update a 64 bit stat from the chip. + * @hw: ptr to the hardware info. + * @hireg: the high 32 bit reg to read. + * @loreg: the low 32 bit reg to read. + * @offset_loaded: has the initial offset been loaded yet. + * @offset: ptr to current offset value. + * @stat: ptr to the stat. + * + * Since the device stats are not reset at PFReset, they will not + * be zeroed when the driver starts. We'll save the first values read + * and use them as offsets to be subtracted from the raw values in order + * to report stats that count from zero. + **/ +static void i40e_stat_update64(struct i40e_hw *hw, u32 hireg, u32 loreg, + bool offset_loaded, u64 *offset, u64 *stat) +{ + u64 new_data; + + new_data = rd64(hw, loreg); + + if (!offset_loaded || new_data < *offset) + *offset = new_data; + *stat = new_data - *offset; +} + /** * i40e_stat_update48 - read and update a 48 bit stat from the chip * @hw: ptr to the hardware info @@ -620,6 +661,34 @@ static void i40e_stat_update_and_clear32(struct i40e_hw *hw, u32 reg, u64 *stat) *stat += new_data; } +/** + * i40e_stats_update_rx_discards - update rx_discards. + * @vsi: ptr to the VSI to be updated. + * @hw: ptr to the hardware info. + * @stat_idx: VSI's stat_counter_idx. + * @offset_loaded: ptr to the VSI's stat_offsets_loaded. + * @stat_offset: ptr to stat_offset to store first read of specific register. + * @stat: ptr to VSI's stat to be updated. + **/ +static void +i40e_stats_update_rx_discards(struct i40e_vsi *vsi, struct i40e_hw *hw, + int stat_idx, bool offset_loaded, + struct i40e_eth_stats *stat_offset, + struct i40e_eth_stats *stat) +{ + u64 rx_rdpc, rx_rxerr; + + i40e_stat_update32(hw, I40E_GLV_RDPC(stat_idx), offset_loaded, + &stat_offset->rx_discards, &rx_rdpc); + i40e_stat_update64(hw, + I40E_GL_RXERR1H(i40e_compute_pci_to_hw_id(vsi, hw)), + I40E_GL_RXERR1L(i40e_compute_pci_to_hw_id(vsi, hw)), + offset_loaded, &stat_offset->rx_discards_other, + &rx_rxerr); + + stat->rx_discards = rx_rdpc + rx_rxerr; +} + /** * i40e_update_eth_stats - Update VSI-specific ethernet statistics counters. * @vsi: the VSI to be updated @@ -679,6 +748,10 @@ void i40e_update_eth_stats(struct i40e_vsi *vsi) I40E_GLV_BPTCL(stat_idx), vsi->stat_offsets_loaded, &oes->tx_broadcast, &es->tx_broadcast); + + i40e_stats_update_rx_discards(vsi, hw, stat_idx, + vsi->stat_offsets_loaded, oes, es); + vsi->stat_offsets_loaded = true; } diff --git a/drivers/net/ethernet/intel/i40e/i40e_register.h b/drivers/net/ethernet/intel/i40e/i40e_register.h index 1908eed4fa5ee9..7339003aa17cd3 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_register.h +++ b/drivers/net/ethernet/intel/i40e/i40e_register.h @@ -211,6 +211,11 @@ #define I40E_GLGEN_MSRWD_MDIWRDATA_SHIFT 0 #define I40E_GLGEN_MSRWD_MDIRDDATA_SHIFT 16 #define I40E_GLGEN_MSRWD_MDIRDDATA_MASK I40E_MASK(0xFFFF, I40E_GLGEN_MSRWD_MDIRDDATA_SHIFT) +#define I40E_GLGEN_PCIFCNCNT 0x001C0AB4 /* Reset: PCIR */ +#define I40E_GLGEN_PCIFCNCNT_PCIPFCNT_SHIFT 0 +#define I40E_GLGEN_PCIFCNCNT_PCIPFCNT_MASK I40E_MASK(0x1F, I40E_GLGEN_PCIFCNCNT_PCIPFCNT_SHIFT) +#define I40E_GLGEN_PCIFCNCNT_PCIVFCNT_SHIFT 16 +#define I40E_GLGEN_PCIFCNCNT_PCIVFCNT_MASK I40E_MASK(0xFF, I40E_GLGEN_PCIFCNCNT_PCIVFCNT_SHIFT) #define I40E_GLGEN_RSTAT 0x000B8188 /* Reset: POR */ #define I40E_GLGEN_RSTAT_DEVSTATE_SHIFT 0 #define I40E_GLGEN_RSTAT_DEVSTATE_MASK I40E_MASK(0x3, I40E_GLGEN_RSTAT_DEVSTATE_SHIFT) @@ -643,6 +648,14 @@ #define I40E_VFQF_HKEY1_MAX_INDEX 12 #define I40E_VFQF_HLUT1(_i, _VF) (0x00220000 + ((_i) * 1024 + (_VF) * 4)) /* _i=0...15, _VF=0...127 */ /* Reset: CORER */ #define I40E_VFQF_HLUT1_MAX_INDEX 15 +#define I40E_GL_RXERR1H(_i) (0x00318004 + ((_i) * 8)) /* _i=0...143 */ /* Reset: CORER */ +#define I40E_GL_RXERR1H_MAX_INDEX 143 +#define I40E_GL_RXERR1H_RXERR1H_SHIFT 0 +#define I40E_GL_RXERR1H_RXERR1H_MASK I40E_MASK(0xFFFFFFFF, I40E_GL_RXERR1H_RXERR1H_SHIFT) +#define I40E_GL_RXERR1L(_i) (0x00318000 + ((_i) * 8)) /* _i=0...143 */ /* Reset: CORER */ +#define I40E_GL_RXERR1L_MAX_INDEX 143 +#define I40E_GL_RXERR1L_RXERR1L_SHIFT 0 +#define I40E_GL_RXERR1L_RXERR1L_MASK I40E_MASK(0xFFFFFFFF, I40E_GL_RXERR1L_RXERR1L_SHIFT) #define I40E_GLPRT_BPRCH(_i) (0x003005E4 + ((_i) * 8)) /* _i=0...3 */ /* Reset: CORER */ #define I40E_GLPRT_BPRCL(_i) (0x003005E0 + ((_i) * 8)) /* _i=0...3 */ /* Reset: CORER */ #define I40E_GLPRT_BPTCH(_i) (0x00300A04 + ((_i) * 8)) /* _i=0...3 */ /* Reset: CORER */ diff --git a/drivers/net/ethernet/intel/i40e/i40e_type.h b/drivers/net/ethernet/intel/i40e/i40e_type.h index 36a4ca1ffb1a93..7b3f30beb757ad 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_type.h +++ b/drivers/net/ethernet/intel/i40e/i40e_type.h @@ -1172,6 +1172,7 @@ struct i40e_eth_stats { u64 tx_broadcast; /* bptc */ u64 tx_discards; /* tdpc */ u64 tx_errors; /* tepc */ + u64 rx_discards_other; /* rxerr1 */ }; /* Statistics collected per VEB per TC */ From ddec6cbbe22781d17965f1e6386e5a6363c058d2 Mon Sep 17 00:00:00 2001 From: Norbert Zulinski Date: Wed, 8 Jun 2022 11:10:56 +0200 Subject: [PATCH 0466/1056] i40e: Fix VF's MAC Address change on VM [ Upstream commit fed0d9f13266a22ce1fc9a97521ef9cdc6271a23 ] Clear VF MAC from parent PF and remove VF filter from VSI when both conditions are true: -VIRTCHNL_VF_OFFLOAD_USO is not used -VM MAC was not set from PF level It affects older version of IAVF and it allow them to change MAC Address on VM, newer IAVF won't change their behaviour. Previously it wasn't possible to change VF's MAC Address on VM because there is flag on IAVF driver that won't allow to change MAC Address if this address is given from PF driver. Fixes: 155f0ac2c96b ("iavf: allow permanent MAC address to change") Signed-off-by: Norbert Zulinski Signed-off-by: Jan Sokolowski Tested-by: Konrad Jankowski Signed-off-by: Tony Nguyen Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index 6c1e668f4ebf27..d78ac5e7f658f6 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -2147,6 +2147,10 @@ static int i40e_vc_get_vf_resources_msg(struct i40e_vf *vf, u8 *msg) /* VFs only use TC 0 */ vfres->vsi_res[0].qset_handle = le16_to_cpu(vsi->info.qs_handle[0]); + if (!(vf->driver_caps & VIRTCHNL_VF_OFFLOAD_USO) && !vf->pf_set_mac) { + i40e_del_mac_filter(vsi, vf->default_lan_addr.addr); + eth_zero_addr(vf->default_lan_addr.addr); + } ether_addr_copy(vfres->vsi_res[0].default_mac_addr, vf->default_lan_addr.addr); } From d5670adf5cffe9075b906e122b7250d02cf9fd91 Mon Sep 17 00:00:00 2001 From: Amelie Delaunay Date: Wed, 6 Oct 2021 11:53:55 +0200 Subject: [PATCH 0467/1056] ARM: dts: stm32: use usbphyc ck_usbo_48m as USBH OHCI clock on stm32mp151 [ Upstream commit db7be2cb87ae65e2d033a9f61f7fb94bce505177 ] Referring to the note under USBH reset and clocks chapter of RM0436, "In order to access USBH_OHCI registers it is necessary to activate the USB clocks by enabling the PLL controlled by USBPHYC" (ck_usbo_48m). The point is, when USBPHYC PLL is not enabled, OHCI register access freezes the resume from STANDBY. It is the case when dual USBH is enabled, instead of OTG + single USBH. When OTG is probed, as ck_usbo_48m is USBO clock parent, then USBPHYC PLL is enabled and OHCI register access is OK. This patch adds ck_usbo_48m (provided by USBPHYC PLL) as clock of USBH OHCI, thus USBPHYC PLL will be enabled and OHCI register access will be OK. Signed-off-by: Amelie Delaunay Signed-off-by: Alexandre Torgue Signed-off-by: Sasha Levin --- arch/arm/boot/dts/stm32mp151.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/stm32mp151.dtsi b/arch/arm/boot/dts/stm32mp151.dtsi index 6992a4b0ba79bb..f693a7d242471b 100644 --- a/arch/arm/boot/dts/stm32mp151.dtsi +++ b/arch/arm/boot/dts/stm32mp151.dtsi @@ -1452,7 +1452,7 @@ usbh_ohci: usb@5800c000 { compatible = "generic-ohci"; reg = <0x5800c000 0x1000>; - clocks = <&rcc USBH>; + clocks = <&rcc USBH>, <&usbphyc>; resets = <&rcc USBH_R>; interrupts = ; status = "disabled"; From 5912e5e47ac99852dc4a8784c74b80a01c1765d9 Mon Sep 17 00:00:00 2001 From: Fabrice Gasnier Date: Tue, 21 Jun 2022 10:45:09 +0200 Subject: [PATCH 0468/1056] ARM: dts: stm32: add missing usbh clock and fix clk order on stm32mp15 [ Upstream commit 1d0c1aadf1fd9f3de95d1532b3651e8634546e71 ] The USBH composed of EHCI and OHCI controllers needs the PHY clock to be initialized first, before enabling (gating) them. The reverse is also required when going to suspend. So, add USBPHY clock as 1st entry in both controllers, so the USBPHY PLL gets enabled 1st upon controller init. Upon suspend/resume, this also makes the clock to be disabled/re-enabled in the correct order. This fixes some IRQ storm conditions seen when going to low-power, due to PHY PLL being disabled before all clocks are cleanly gated. Fixes: 949a0c0dec85 ("ARM: dts: stm32: add USB Host (USBH) support to stm32mp157c") Fixes: db7be2cb87ae ("ARM: dts: stm32: use usbphyc ck_usbo_48m as USBH OHCI clock on stm32mp151") Signed-off-by: Fabrice Gasnier Signed-off-by: Alexandre Torgue Signed-off-by: Sasha Levin --- arch/arm/boot/dts/stm32mp151.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/stm32mp151.dtsi b/arch/arm/boot/dts/stm32mp151.dtsi index f693a7d242471b..a9b65b3bfda58a 100644 --- a/arch/arm/boot/dts/stm32mp151.dtsi +++ b/arch/arm/boot/dts/stm32mp151.dtsi @@ -1452,7 +1452,7 @@ usbh_ohci: usb@5800c000 { compatible = "generic-ohci"; reg = <0x5800c000 0x1000>; - clocks = <&rcc USBH>, <&usbphyc>; + clocks = <&usbphyc>, <&rcc USBH>; resets = <&rcc USBH_R>; interrupts = ; status = "disabled"; @@ -1461,7 +1461,7 @@ usbh_ehci: usb@5800d000 { compatible = "generic-ehci"; reg = <0x5800d000 0x1000>; - clocks = <&rcc USBH>; + clocks = <&usbphyc>, <&rcc USBH>; resets = <&rcc USBH_R>; interrupts = ; companion = <&usbh_ohci>; From 3fdca34e78110cafa99467c302b61e68b2f5dce5 Mon Sep 17 00:00:00 2001 From: Rick Lindsley Date: Sat, 2 Jul 2022 03:37:12 -0700 Subject: [PATCH 0469/1056] ibmvnic: Properly dispose of all skbs during a failover. [ Upstream commit 1b18f09d31cfa7148df15a7d5c5e0e86f105f7d1 ] During a reset, there may have been transmits in flight that are no longer valid and cannot be fulfilled. Resetting and clearing the queues is insufficient; each skb also needs to be explicitly freed so that upper levels are not left waiting for confirmation of a transmit that will never happen. If this happens frequently enough, the apparent backlog will cause TCP to begin "congestion control" unnecessarily, culminating in permanently decreased throughput. Fixes: d7c0ef36bde03 ("ibmvnic: Free and re-allocate scrqs when tx/rx scrqs change") Tested-by: Nick Child Reviewed-by: Brian King Signed-off-by: Rick Lindsley Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/ibm/ibmvnic.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index 28344c3dfea1b2..4a070724a8fb6a 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -5585,6 +5585,15 @@ static int ibmvnic_reset_init(struct ibmvnic_adapter *adapter, bool reset) release_sub_crqs(adapter, 0); rc = init_sub_crqs(adapter); } else { + /* no need to reinitialize completely, but we do + * need to clean up transmits that were in flight + * when we processed the reset. Failure to do so + * will confound the upper layer, usually TCP, by + * creating the illusion of transmits that are + * awaiting completion. + */ + clean_tx_pools(adapter); + rc = reset_sub_crq_queues(adapter); } } else { From 8e5fcfecd99a1ce881aaeba031b1a7faccfefc7a Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Sun, 3 Jul 2022 10:36:24 +0300 Subject: [PATCH 0470/1056] selftests: forwarding: fix flood_unicast_test when h2 supports IFF_UNICAST_FLT [ Upstream commit b8e629b05f5d23f9649c901bef09fab8b0c2e4b9 ] As mentioned in the blamed commit, flood_unicast_test() works by checking the match count on a tc filter placed on the receiving interface. But the second host interface (host2_if) has no interest in receiving a packet with MAC DA de:ad:be:ef:13:37, so its RX filter drops it even before the ingress tc filter gets to be executed. So we will incorrectly get the message "Packet was not flooded when should", when in fact, the packet was flooded as expected but dropped due to an unrelated reason, at some other layer on the receiving side. Force h2 to accept this packet by temporarily placing it in promiscuous mode. Alternatively we could either deliver to its MAC address or use tcpdump_start, but this has the fewest complications. This fixes the "flooding" test from bridge_vlan_aware.sh and bridge_vlan_unaware.sh, which calls flood_test from the lib. Fixes: 236dd50bf67a ("selftests: forwarding: Add a test for flooded traffic") Signed-off-by: Vladimir Oltean Reviewed-by: Ido Schimmel Tested-by: Ido Schimmel Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- tools/testing/selftests/net/forwarding/lib.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh index 92087d423bcf1d..8a9c55fe841ac6 100644 --- a/tools/testing/selftests/net/forwarding/lib.sh +++ b/tools/testing/selftests/net/forwarding/lib.sh @@ -1215,6 +1215,7 @@ flood_test_do() # Add an ACL on `host2_if` which will tell us whether the packet # was flooded to it or not. + ip link set $host2_if promisc on tc qdisc add dev $host2_if ingress tc filter add dev $host2_if ingress protocol ip pref 1 handle 101 \ flower dst_mac $mac action drop @@ -1232,6 +1233,7 @@ flood_test_do() tc filter del dev $host2_if ingress protocol ip pref 1 handle 101 flower tc qdisc del dev $host2_if ingress + ip link set $host2_if promisc off return $err } From 1b74fe2e8f5ced78001cf5efd5d24cd304b4c538 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Sun, 3 Jul 2022 10:36:25 +0300 Subject: [PATCH 0471/1056] selftests: forwarding: fix learning_test when h1 supports IFF_UNICAST_FLT [ Upstream commit 1a635d3e1c80626237fdae47a5545b6655d8d81c ] The first host interface has by default no interest in receiving packets MAC DA de:ad:be:ef:13:37, so it might drop them before they hit the tc filter and this might confuse the selftest. Enable promiscuous mode such that the filter properly counts received packets. Fixes: d4deb01467ec ("selftests: forwarding: Add a test for FDB learning") Signed-off-by: Vladimir Oltean Reviewed-by: Ido Schimmel Tested-by: Ido Schimmel Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- tools/testing/selftests/net/forwarding/lib.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh index 8a9c55fe841ac6..0db6ea8d7e0511 100644 --- a/tools/testing/selftests/net/forwarding/lib.sh +++ b/tools/testing/selftests/net/forwarding/lib.sh @@ -1149,6 +1149,7 @@ learning_test() # FDB entry was installed. bridge link set dev $br_port1 flood off + ip link set $host1_if promisc on tc qdisc add dev $host1_if ingress tc filter add dev $host1_if ingress protocol ip pref 1 handle 101 \ flower dst_mac $mac action drop @@ -1198,6 +1199,7 @@ learning_test() tc filter del dev $host1_if ingress protocol ip pref 1 handle 101 flower tc qdisc del dev $host1_if ingress + ip link set $host1_if promisc off bridge link set dev $br_port1 flood on From 3abec0b381737580e3534988e0b647e780113897 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Sun, 3 Jul 2022 10:36:26 +0300 Subject: [PATCH 0472/1056] selftests: forwarding: fix error message in learning_test [ Upstream commit 83844aacab2015da1dba1df0cc61fc4b4c4e8076 ] When packets are not received, they aren't received on $host1_if, so the message talking about the second host not receiving them is incorrect. Fix it. Fixes: d4deb01467ec ("selftests: forwarding: Add a test for FDB learning") Signed-off-by: Vladimir Oltean Reviewed-by: Ido Schimmel Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- tools/testing/selftests/net/forwarding/lib.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh index 0db6ea8d7e0511..c9507df9c05bc6 100644 --- a/tools/testing/selftests/net/forwarding/lib.sh +++ b/tools/testing/selftests/net/forwarding/lib.sh @@ -1160,7 +1160,7 @@ learning_test() tc -j -s filter show dev $host1_if ingress \ | jq -e ".[] | select(.options.handle == 101) \ | select(.options.actions[0].stats.packets == 1)" &> /dev/null - check_fail $? "Packet reached second host when should not" + check_fail $? "Packet reached first host when should not" $MZ $host1_if -c 1 -p 64 -a $mac -t ip -q sleep 1 From 941d77b795d1a9c1a84a37df9c262ede8edb4ae0 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Tue, 5 Jul 2022 21:15:22 +0200 Subject: [PATCH 0473/1056] r8169: fix accessing unset transport header [ Upstream commit faa4e04e5e140a6d02260289a8fba8fd8d7a3003 ] 66e4c8d95008 ("net: warn if transport header was not set") added a check that triggers a warning in r8169, see [0]. The commit referenced in the Fixes tag refers to the change from which the patch applies cleanly, there's nothing wrong with this commit. It seems the actual issue (not bug, because the warning is harmless here) was introduced with bdfa4ed68187 ("r8169: use Giant Send"). [0] https://bugzilla.kernel.org/show_bug.cgi?id=216157 Fixes: 8d520b4de3ed ("r8169: work around RTL8125 UDP hw bug") Reported-by: Erhard F. Tested-by: Erhard F. Signed-off-by: Heiner Kallweit Link: https://lore.kernel.org/r/1b2c2b29-3dc0-f7b6-5694-97ec526d51a0@gmail.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/realtek/r8169_main.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index 2918947dd57c9e..2af4c76bcf0274 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -4177,7 +4177,6 @@ static void rtl8169_tso_csum_v1(struct sk_buff *skb, u32 *opts) static bool rtl8169_tso_csum_v2(struct rtl8169_private *tp, struct sk_buff *skb, u32 *opts) { - u32 transport_offset = (u32)skb_transport_offset(skb); struct skb_shared_info *shinfo = skb_shinfo(skb); u32 mss = shinfo->gso_size; @@ -4194,7 +4193,7 @@ static bool rtl8169_tso_csum_v2(struct rtl8169_private *tp, WARN_ON_ONCE(1); } - opts[0] |= transport_offset << GTTCPHO_SHIFT; + opts[0] |= skb_transport_offset(skb) << GTTCPHO_SHIFT; opts[1] |= mss << TD1_MSS_SHIFT; } else if (skb->ip_summed == CHECKSUM_PARTIAL) { u8 ip_protocol; @@ -4222,7 +4221,7 @@ static bool rtl8169_tso_csum_v2(struct rtl8169_private *tp, else WARN_ON_ONCE(1); - opts[1] |= transport_offset << TCPHO_SHIFT; + opts[1] |= skb_transport_offset(skb) << TCPHO_SHIFT; } else { unsigned int padto = rtl_quirk_packet_padto(tp, skb); @@ -4389,14 +4388,13 @@ static netdev_features_t rtl8169_features_check(struct sk_buff *skb, struct net_device *dev, netdev_features_t features) { - int transport_offset = skb_transport_offset(skb); struct rtl8169_private *tp = netdev_priv(dev); if (skb_is_gso(skb)) { if (tp->mac_version == RTL_GIGA_MAC_VER_34) features = rtl8168evl_fix_tso(skb, features); - if (transport_offset > GTTCPHO_MAX && + if (skb_transport_offset(skb) > GTTCPHO_MAX && rtl_chip_supports_csum_v2(tp)) features &= ~NETIF_F_ALL_TSO; } else if (skb->ip_summed == CHECKSUM_PARTIAL) { @@ -4407,7 +4405,7 @@ static netdev_features_t rtl8169_features_check(struct sk_buff *skb, if (rtl_quirk_packet_padto(tp, skb)) features &= ~NETIF_F_CSUM_MASK; - if (transport_offset > TCPHO_MAX && + if (skb_transport_offset(skb) > TCPHO_MAX && rtl_chip_supports_csum_v2(tp)) features &= ~NETIF_F_CSUM_MASK; } From 530ee8d3c6a473dfba62bd15487c3b05728bf097 Mon Sep 17 00:00:00 2001 From: Satish Nagireddy Date: Tue, 28 Jun 2022 12:12:16 -0700 Subject: [PATCH 0474/1056] i2c: cadence: Unregister the clk notifier in error path [ Upstream commit 3501f0c663063513ad604fb1b3f06af637d3396d ] This patch ensures that the clock notifier is unregistered when driver probe is returning error. Fixes: df8eb5691c48 ("i2c: Add driver for Cadence I2C controller") Signed-off-by: Satish Nagireddy Tested-by: Lars-Peter Clausen Reviewed-by: Michal Simek Signed-off-by: Wolfram Sang Signed-off-by: Sasha Levin --- drivers/i2c/busses/i2c-cadence.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/i2c/busses/i2c-cadence.c b/drivers/i2c/busses/i2c-cadence.c index b4c1ad19cdaec1..3d6f8ee355bfce 100644 --- a/drivers/i2c/busses/i2c-cadence.c +++ b/drivers/i2c/busses/i2c-cadence.c @@ -1338,6 +1338,7 @@ static int cdns_i2c_probe(struct platform_device *pdev) return 0; err_clk_dis: + clk_notifier_unregister(id->clk, &id->clk_rate_change_nb); clk_disable_unprepare(id->clk); pm_runtime_disable(&pdev->dev); pm_runtime_set_suspended(&pdev->dev); From d76704f8ccbb4a3905c9b4b094a8af785a2a747e Mon Sep 17 00:00:00 2001 From: Peter Robinson Date: Mon, 6 Jun 2022 17:10:34 +0100 Subject: [PATCH 0475/1056] dmaengine: imx-sdma: Allow imx8m for imx7 FW revs commit a7cd3cf0b2e5aaacfe5e02c472bd28e98e640be7 upstream. The revision of the imx-sdma IP that is in the i.MX8M series is the same is that as that in the i.MX7 series but the imx7d MODULE_FIRMWARE directive is wrapped in a condiditional which means it's not defined when built for aarch64 SOC_IMX8M platforms and hence you get the following errors when the driver loads on imx8m devices: imx-sdma 302c0000.dma-controller: Direct firmware load for imx/sdma/sdma-imx7d.bin failed with error -2 imx-sdma 302c0000.dma-controller: external firmware not found, using ROM firmware Add the SOC_IMX8M into the check so the firmware can load on i.MX8. Fixes: 1474d48bd639 ("arm64: dts: imx8mq: Add SDMA nodes") Fixes: 941acd566b18 ("dmaengine: imx-sdma: Only check ratio on parts that support 1:1") Signed-off-by: Peter Robinson Cc: stable@vger.kernel.org # v5.2+ Reviewed-by: Fabio Estevam Link: https://lore.kernel.org/r/20220606161034.3544803-1-pbrobinson@gmail.com Signed-off-by: Vinod Koul Signed-off-by: Greg Kroah-Hartman --- drivers/dma/imx-sdma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma/imx-sdma.c b/drivers/dma/imx-sdma.c index 2300d965a3f44c..5215a5e39f3c3c 100644 --- a/drivers/dma/imx-sdma.c +++ b/drivers/dma/imx-sdma.c @@ -2264,7 +2264,7 @@ MODULE_DESCRIPTION("i.MX SDMA driver"); #if IS_ENABLED(CONFIG_SOC_IMX6Q) MODULE_FIRMWARE("imx/sdma/sdma-imx6q.bin"); #endif -#if IS_ENABLED(CONFIG_SOC_IMX7D) +#if IS_ENABLED(CONFIG_SOC_IMX7D) || IS_ENABLED(CONFIG_SOC_IMX8M) MODULE_FIRMWARE("imx/sdma/sdma-imx7d.bin"); #endif MODULE_LICENSE("GPL"); From 378080b7d8dd39069c3c0a6d5ff5a0a9a00189b4 Mon Sep 17 00:00:00 2001 From: Shuah Khan Date: Thu, 30 Jun 2022 20:32:55 -0600 Subject: [PATCH 0476/1056] misc: rtsx_usb: fix use of dma mapped buffer for usb bulk transfer commit eb7f8e28420372787933eec079735c35034bda7d upstream. rtsx_usb driver allocates coherent dma buffer for urb transfers. This buffer is passed to usb_bulk_msg() and usb core tries to map already mapped buffer running into a dma mapping error. xhci_hcd 0000:01:00.0: rejecting DMA map of vmalloc memory WARNING: CPU: 1 PID: 279 at include/linux/dma-mapping.h:326 usb_ hcd_map_urb_for_dma+0x7d6/0x820 ... xhci_map_urb_for_dma+0x291/0x4e0 usb_hcd_submit_urb+0x199/0x12b0 ... usb_submit_urb+0x3b8/0x9e0 usb_start_wait_urb+0xe3/0x2d0 usb_bulk_msg+0x115/0x240 rtsx_usb_transfer_data+0x185/0x1a8 [rtsx_usb] rtsx_usb_send_cmd+0xbb/0x123 [rtsx_usb] rtsx_usb_write_register+0x12c/0x143 [rtsx_usb] rtsx_usb_probe+0x226/0x4b2 [rtsx_usb] Fix it to use kmalloc() to get DMA-able memory region instead. Signed-off-by: Shuah Khan Cc: stable Link: https://lore.kernel.org/r/667d627d502e1ba9ff4f9b94966df3299d2d3c0d.1656642167.git.skhan@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- drivers/misc/cardreader/rtsx_usb.c | 13 +++++++------ include/linux/rtsx_usb.h | 1 - 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/misc/cardreader/rtsx_usb.c b/drivers/misc/cardreader/rtsx_usb.c index 1ef9b61077c44e..e147cc8ab0fdfa 100644 --- a/drivers/misc/cardreader/rtsx_usb.c +++ b/drivers/misc/cardreader/rtsx_usb.c @@ -631,8 +631,7 @@ static int rtsx_usb_probe(struct usb_interface *intf, ucr->pusb_dev = usb_dev; - ucr->iobuf = usb_alloc_coherent(ucr->pusb_dev, IOBUF_SIZE, - GFP_KERNEL, &ucr->iobuf_dma); + ucr->iobuf = kmalloc(IOBUF_SIZE, GFP_KERNEL); if (!ucr->iobuf) return -ENOMEM; @@ -668,8 +667,9 @@ static int rtsx_usb_probe(struct usb_interface *intf, out_init_fail: usb_set_intfdata(ucr->pusb_intf, NULL); - usb_free_coherent(ucr->pusb_dev, IOBUF_SIZE, ucr->iobuf, - ucr->iobuf_dma); + kfree(ucr->iobuf); + ucr->iobuf = NULL; + ucr->cmd_buf = ucr->rsp_buf = NULL; return ret; } @@ -682,8 +682,9 @@ static void rtsx_usb_disconnect(struct usb_interface *intf) mfd_remove_devices(&intf->dev); usb_set_intfdata(ucr->pusb_intf, NULL); - usb_free_coherent(ucr->pusb_dev, IOBUF_SIZE, ucr->iobuf, - ucr->iobuf_dma); + kfree(ucr->iobuf); + ucr->iobuf = NULL; + ucr->cmd_buf = ucr->rsp_buf = NULL; } #ifdef CONFIG_PM diff --git a/include/linux/rtsx_usb.h b/include/linux/rtsx_usb.h index 159729cffd8e11..a07f7341ebc25a 100644 --- a/include/linux/rtsx_usb.h +++ b/include/linux/rtsx_usb.h @@ -55,7 +55,6 @@ struct rtsx_ucr { struct usb_interface *pusb_intf; struct usb_sg_request current_sg; unsigned char *iobuf; - dma_addr_t iobuf_dma; struct timer_list sg_timer; struct mutex dev_mutex; From bab1a05a11418d6b77723aa782bf7c7229889924 Mon Sep 17 00:00:00 2001 From: Shuah Khan Date: Thu, 30 Jun 2022 20:32:56 -0600 Subject: [PATCH 0477/1056] misc: rtsx_usb: use separate command and response buffers commit 3776c78559853fd151be7c41e369fd076fb679d5 upstream. rtsx_usb uses same buffer for command and response. There could be a potential conflict using the same buffer for both especially if retries and timeouts are involved. Use separate command and response buffers to avoid conflicts. Signed-off-by: Shuah Khan Cc: stable Link: https://lore.kernel.org/r/07e3721804ff07aaab9ef5b39a5691d0718b9ade.1656642167.git.skhan@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- drivers/misc/cardreader/rtsx_usb.c | 26 +++++++++++++++++--------- include/linux/rtsx_usb.h | 1 - 2 files changed, 17 insertions(+), 10 deletions(-) diff --git a/drivers/misc/cardreader/rtsx_usb.c b/drivers/misc/cardreader/rtsx_usb.c index e147cc8ab0fdfa..4e210805250949 100644 --- a/drivers/misc/cardreader/rtsx_usb.c +++ b/drivers/misc/cardreader/rtsx_usb.c @@ -631,15 +631,18 @@ static int rtsx_usb_probe(struct usb_interface *intf, ucr->pusb_dev = usb_dev; - ucr->iobuf = kmalloc(IOBUF_SIZE, GFP_KERNEL); - if (!ucr->iobuf) + ucr->cmd_buf = kmalloc(IOBUF_SIZE, GFP_KERNEL); + if (!ucr->cmd_buf) return -ENOMEM; + ucr->rsp_buf = kmalloc(IOBUF_SIZE, GFP_KERNEL); + if (!ucr->rsp_buf) + goto out_free_cmd_buf; + usb_set_intfdata(intf, ucr); ucr->vendor_id = id->idVendor; ucr->product_id = id->idProduct; - ucr->cmd_buf = ucr->rsp_buf = ucr->iobuf; mutex_init(&ucr->dev_mutex); @@ -667,9 +670,11 @@ static int rtsx_usb_probe(struct usb_interface *intf, out_init_fail: usb_set_intfdata(ucr->pusb_intf, NULL); - kfree(ucr->iobuf); - ucr->iobuf = NULL; - ucr->cmd_buf = ucr->rsp_buf = NULL; + kfree(ucr->rsp_buf); + ucr->rsp_buf = NULL; +out_free_cmd_buf: + kfree(ucr->cmd_buf); + ucr->cmd_buf = NULL; return ret; } @@ -682,9 +687,12 @@ static void rtsx_usb_disconnect(struct usb_interface *intf) mfd_remove_devices(&intf->dev); usb_set_intfdata(ucr->pusb_intf, NULL); - kfree(ucr->iobuf); - ucr->iobuf = NULL; - ucr->cmd_buf = ucr->rsp_buf = NULL; + + kfree(ucr->cmd_buf); + ucr->cmd_buf = NULL; + + kfree(ucr->rsp_buf); + ucr->rsp_buf = NULL; } #ifdef CONFIG_PM diff --git a/include/linux/rtsx_usb.h b/include/linux/rtsx_usb.h index a07f7341ebc25a..3247ed8e9ff0fb 100644 --- a/include/linux/rtsx_usb.h +++ b/include/linux/rtsx_usb.h @@ -54,7 +54,6 @@ struct rtsx_ucr { struct usb_device *pusb_dev; struct usb_interface *pusb_intf; struct usb_sg_request current_sg; - unsigned char *iobuf; struct timer_list sg_timer; struct mutex dev_mutex; From c1c98764c3c31e01800b7b0a1b15be7a04ebe3d8 Mon Sep 17 00:00:00 2001 From: Shuah Khan Date: Fri, 1 Jul 2022 10:53:52 -0600 Subject: [PATCH 0478/1056] misc: rtsx_usb: set return value in rsp_buf alloc err path commit 2cd37c2e72449a7add6da1183d20a6247d6db111 upstream. Set return value in rsp_buf alloc error path before going to error handling. drivers/misc/cardreader/rtsx_usb.c:639:6: warning: variable 'ret' is used uninitialized whenever 'if' condition is true [-Wsometimes-uninitialized] if (!ucr->rsp_buf) ^~~~~~~~~~~~~ drivers/misc/cardreader/rtsx_usb.c:678:9: note: uninitialized use occurs here return ret; ^~~ drivers/misc/cardreader/rtsx_usb.c:639:2: note: remove the 'if' if its condition is always false if (!ucr->rsp_buf) ^~~~~~~~~~~~~~~~~~ drivers/misc/cardreader/rtsx_usb.c:622:9: note: initialize the variable 'ret' to silence this warning int ret; ^ = 0 Fixes: 3776c7855985 ("misc: rtsx_usb: use separate command and response buffers") Reported-by: kernel test robot Cc: stable Signed-off-by: Shuah Khan Link: https://lore.kernel.org/r/20220701165352.15687-1-skhan@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- drivers/misc/cardreader/rtsx_usb.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/misc/cardreader/rtsx_usb.c b/drivers/misc/cardreader/rtsx_usb.c index 4e210805250949..f150d8769f1986 100644 --- a/drivers/misc/cardreader/rtsx_usb.c +++ b/drivers/misc/cardreader/rtsx_usb.c @@ -636,8 +636,10 @@ static int rtsx_usb_probe(struct usb_interface *intf, return -ENOMEM; ucr->rsp_buf = kmalloc(IOBUF_SIZE, GFP_KERNEL); - if (!ucr->rsp_buf) + if (!ucr->rsp_buf) { + ret = -ENOMEM; goto out_free_cmd_buf; + } usb_set_intfdata(intf, ucr); From 2fa22e7906c1caf4731dc150a464547468b4b03a Mon Sep 17 00:00:00 2001 From: Naoya Horiguchi Date: Thu, 28 Apr 2022 23:16:02 -0700 Subject: [PATCH 0479/1056] Revert "mm/memory-failure.c: fix race with changing page compound again" commit 2ba2b008a8bf5fd268a43d03ba79e0ad464d6836 upstream. Reverts commit 888af2701db7 ("mm/memory-failure.c: fix race with changing page compound again") because now we fetch the page refcount under hugetlb_lock in try_memory_failure_hugetlb() so that the race check is no longer necessary. Link: https://lkml.kernel.org/r/20220408135323.1559401-4-naoya.horiguchi@linux.dev Signed-off-by: Naoya Horiguchi Suggested-by: Miaohe Lin Reviewed-by: Miaohe Lin Reviewed-by: Mike Kravetz Cc: Miaohe Lin Cc: Yang Shi Cc: Dan Carpenter Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- include/linux/mm.h | 1 - include/ras/ras_event.h | 1 - mm/memory-failure.c | 11 ----------- 3 files changed, 13 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index c5fa46e9c0ca4e..e4e1817bb3b89b 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -3175,7 +3175,6 @@ enum mf_action_page_type { MF_MSG_BUDDY_2ND, MF_MSG_DAX, MF_MSG_UNSPLIT_THP, - MF_MSG_DIFFERENT_PAGE_SIZE, MF_MSG_UNKNOWN, }; diff --git a/include/ras/ras_event.h b/include/ras/ras_event.h index cac13ff1d6eb4e..0bdbc0d17d2fa2 100644 --- a/include/ras/ras_event.h +++ b/include/ras/ras_event.h @@ -376,7 +376,6 @@ TRACE_EVENT(aer_event, EM ( MF_MSG_BUDDY_2ND, "free buddy page (2nd try)" ) \ EM ( MF_MSG_DAX, "dax page" ) \ EM ( MF_MSG_UNSPLIT_THP, "unsplit thp" ) \ - EM ( MF_MSG_DIFFERENT_PAGE_SIZE, "different page size" ) \ EMe ( MF_MSG_UNKNOWN, "unknown page" ) /* diff --git a/mm/memory-failure.c b/mm/memory-failure.c index ecd64b203272b1..c71135edd0a10b 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -741,7 +741,6 @@ static const char * const action_page_types[] = { [MF_MSG_BUDDY_2ND] = "free buddy page (2nd try)", [MF_MSG_DAX] = "dax page", [MF_MSG_UNSPLIT_THP] = "unsplit thp", - [MF_MSG_DIFFERENT_PAGE_SIZE] = "different page size", [MF_MSG_UNKNOWN] = "unknown page", }; @@ -1526,16 +1525,6 @@ static int try_memory_failure_hugetlb(unsigned long pfn, int flags, int *hugetlb return res == MF_RECOVERED ? 0 : -EBUSY; } - /* - * The page could have changed compound pages due to race window. - * If this happens just bail out. - */ - if (!PageHuge(p) || compound_head(p) != head) { - action_result(pfn, MF_MSG_DIFFERENT_PAGE_SIZE, MF_IGNORED); - res = -EBUSY; - goto out; - } - page_flags = head->flags; /* From e99bad0d76cf23551f2b0e47d1e18ab6fc8bb9cb Mon Sep 17 00:00:00 2001 From: AngeloGioacchino Del Regno Date: Tue, 10 May 2022 14:26:20 +0200 Subject: [PATCH 0480/1056] Revert "serial: 8250_mtk: Make sure to select the right FEATURE_SEL" commit f0136f65285bcfb7e8f90d1013723076a35acd51 upstream. It was found that some MediaTek SoCs are incompatible with this change. Also, this register was mistakenly understood as it was related to the 16550A register layout selection but, at least on some IPs, if not all, it's related to something else unknown. This reverts commit 6f81fdded0d024c7d4084d434764f30bca1cd6b1. Signed-off-by: AngeloGioacchino Del Regno Fixes: 6f81fdded0d0 ("serial: 8250_mtk: Make sure to select the right FEATURE_SEL") Reported-by: "kernelci.org bot" Link: https://lore.kernel.org/r/20220510122620.150342-1-angelogioacchino.delregno@collabora.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_mtk.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/drivers/tty/serial/8250/8250_mtk.c b/drivers/tty/serial/8250/8250_mtk.c index de57f47635cdd1..de48a58460f47a 100644 --- a/drivers/tty/serial/8250/8250_mtk.c +++ b/drivers/tty/serial/8250/8250_mtk.c @@ -57,9 +57,6 @@ #define MTK_UART_XON1 40 /* I/O: Xon character 1 */ #define MTK_UART_XOFF1 42 /* I/O: Xoff character 1 */ -#define MTK_UART_FEATURE_SEL 39 /* Feature Selection register */ -#define MTK_UART_FEAT_NEWRMAP BIT(0) /* Use new register map */ - #ifdef CONFIG_SERIAL_8250_DMA enum dma_rx_status { DMA_RX_START = 0, @@ -575,10 +572,6 @@ static int mtk8250_probe(struct platform_device *pdev) uart.dma = data->dma; #endif - /* Set AP UART new register map */ - writel(MTK_UART_FEAT_NEWRMAP, uart.port.membase + - (MTK_UART_FEATURE_SEL << uart.port.regshift)); - /* Disable Rate Fix function */ writel(0x0, uart.port.membase + (MTK_UART_RATE_FIX << uart.port.regshift)); From 9839d89112d4cefb1a03592d710a58f02ce740f0 Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Fri, 1 Jul 2022 22:19:02 -0500 Subject: [PATCH 0481/1056] dt-bindings: dma: allwinner,sun50i-a64-dma: Fix min/max typo commit 607a48c78e6b427b0b684d24e61c19e846ad65d6 upstream. The conditional block for variants with a second clock should have set minItems, not maxItems, which was already 2. Since clock-names requires two items, this typo should not have caused any problems. Fixes: edd14218bd66 ("dt-bindings: dmaengine: Convert Allwinner A31 and A64 DMA to a schema") Signed-off-by: Samuel Holland Reviewed-by: Rob Herring Link: https://lore.kernel.org/r/20220702031903.21703-1-samuel@sholland.org Signed-off-by: Vinod Koul Signed-off-by: Greg Kroah-Hartman --- .../devicetree/bindings/dma/allwinner,sun50i-a64-dma.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/dma/allwinner,sun50i-a64-dma.yaml b/Documentation/devicetree/bindings/dma/allwinner,sun50i-a64-dma.yaml index b6e1ebfaf36669..bb3cbc30d91218 100644 --- a/Documentation/devicetree/bindings/dma/allwinner,sun50i-a64-dma.yaml +++ b/Documentation/devicetree/bindings/dma/allwinner,sun50i-a64-dma.yaml @@ -64,7 +64,7 @@ if: then: properties: clocks: - maxItems: 2 + minItems: 2 required: - clock-names From 8b07022de2d3d1e3cb0a673e2c4b35df6a0099ca Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 10 Jul 2022 13:55:49 -0700 Subject: [PATCH 0482/1056] ida: don't use BUG_ON() for debugging commit fc82bbf4dede758007763867d0282353c06d1121 upstream. This is another old BUG_ON() that just shouldn't exist (see also commit a382f8fee42c: "signal handling: don't use BUG_ON() for debugging"). In fact, as Matthew Wilcox points out, this condition shouldn't really even result in a warning, since a negative id allocation result is just a normal allocation failure: "I wonder if we should even warn here -- sure, the caller is trying to free something that wasn't allocated, but we don't warn for kfree(NULL)" and goes on to point out how that current error check is only causing people to unnecessarily do their own index range checking before freeing it. This was noted by Itay Iellin, because the bluetooth HCI socket cookie code does *not* do that range checking, and ends up just freeing the error case too, triggering the BUG_ON(). The HCI code requires CAP_NET_RAW, and seems to just result in an ugly splat, but there really is no reason to BUG_ON() here, and we have generally striven for allocation models where it's always ok to just do free(alloc()); even if the allocation were to fail for some random reason (usually obviously that "random" reason being some resource limit). Fixes: 88eca0207cf1 ("ida: simplified functions for id allocation") Reported-by: Itay Iellin Suggested-by: Matthew Wilcox Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- lib/idr.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/idr.c b/lib/idr.c index f4ab4f4aa3c7f5..7ecdfdb5309e74 100644 --- a/lib/idr.c +++ b/lib/idr.c @@ -491,7 +491,8 @@ void ida_free(struct ida *ida, unsigned int id) struct ida_bitmap *bitmap; unsigned long flags; - BUG_ON((int)id < 0); + if ((int)id < 0) + return; xas_lock_irqsave(&xas, flags); bitmap = xas_load(&xas); From 0bbb30d077f241be01591fd15e0814469eba41ef Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Fri, 20 May 2022 21:14:32 +0300 Subject: [PATCH 0483/1056] dmaengine: pl330: Fix lockdep warning about non-static key commit b64b3b2f1d81f83519582e1feee87d77f51f5f17 upstream. The DEFINE_SPINLOCK() macro shouldn't be used for dynamically allocated spinlocks. The lockdep warns about this and disables locking validator. Fix the warning by making lock static. INFO: trying to register non-static key. The code is fine but needs lockdep annotation, or maybe you didn't initialize this object before use? turning off the locking correctness validator. Hardware name: Radxa ROCK Pi 4C (DT) Call trace: dump_backtrace.part.0+0xcc/0xe0 show_stack+0x18/0x6c dump_stack_lvl+0x8c/0xb8 dump_stack+0x18/0x34 register_lock_class+0x4a8/0x4cc __lock_acquire+0x78/0x20cc lock_acquire.part.0+0xe0/0x230 lock_acquire+0x68/0x84 _raw_spin_lock_irqsave+0x84/0xc4 add_desc+0x44/0xc0 pl330_get_desc+0x15c/0x1d0 pl330_prep_dma_cyclic+0x100/0x270 snd_dmaengine_pcm_trigger+0xec/0x1c0 dmaengine_pcm_trigger+0x18/0x24 ... Fixes: e588710311ee ("dmaengine: pl330: fix descriptor allocation fail") Signed-off-by: Dmitry Osipenko Link: https://lore.kernel.org/r/20220520181432.149904-1-dmitry.osipenko@collabora.com Signed-off-by: Vinod Koul Signed-off-by: Greg Kroah-Hartman --- drivers/dma/pl330.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma/pl330.c b/drivers/dma/pl330.c index 110de8a6005884..4ef68ddff75bc9 100644 --- a/drivers/dma/pl330.c +++ b/drivers/dma/pl330.c @@ -2589,7 +2589,7 @@ static struct dma_pl330_desc *pl330_get_desc(struct dma_pl330_chan *pch) /* If the DMAC pool is empty, alloc new */ if (!desc) { - DEFINE_SPINLOCK(lock); + static DEFINE_SPINLOCK(lock); LIST_HEAD(pool); if (!add_desc(&pool, &lock, GFP_ATOMIC, 1)) From c787908bee3fd1300be61822dd4ccc96e9bc397b Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 22 May 2022 19:41:05 +0200 Subject: [PATCH 0484/1056] dmaengine: lgm: Fix an error handling path in intel_ldma_probe() commit 1dbe67b9faea0bc340cce894018076679c16cb71 upstream. ldma_clk_disable() calls both: clk_disable_unprepare(d->core_clk); reset_control_assert(d->rst); So, should devm_reset_control_get_optional() fail, core_clk should not be prepare_enable'd before it, otherwise it will never be disable_unprepare'd. Reorder the code to handle the error handling path as expected. Fixes: 32d31c79a1a4 ("dmaengine: Add Intel LGM SoC DMA support.") Signed-off-by: Christophe JAILLET Link: https://lore.kernel.org/r/18504549bc4d2b62a72a02cb22a2e4d8e6a58720.1653241224.git.christophe.jaillet@wanadoo.fr Signed-off-by: Vinod Koul Signed-off-by: Greg Kroah-Hartman --- drivers/dma/lgm/lgm-dma.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/dma/lgm/lgm-dma.c b/drivers/dma/lgm/lgm-dma.c index efe8bd3a0e2aa5..9b9184f964be39 100644 --- a/drivers/dma/lgm/lgm-dma.c +++ b/drivers/dma/lgm/lgm-dma.c @@ -1593,11 +1593,12 @@ static int intel_ldma_probe(struct platform_device *pdev) d->core_clk = devm_clk_get_optional(dev, NULL); if (IS_ERR(d->core_clk)) return PTR_ERR(d->core_clk); - clk_prepare_enable(d->core_clk); d->rst = devm_reset_control_get_optional(dev, NULL); if (IS_ERR(d->rst)) return PTR_ERR(d->rst); + + clk_prepare_enable(d->core_clk); reset_control_deassert(d->rst); ret = devm_add_action_or_reset(dev, ldma_clk_disable, d); From e08ccbaa5fb3f2abed9290380f86332d64720b60 Mon Sep 17 00:00:00 2001 From: Michael Walle Date: Thu, 26 May 2022 15:51:11 +0200 Subject: [PATCH 0485/1056] dmaengine: at_xdma: handle errors of at_xdmac_alloc_desc() correctly commit 3770d92bd5237d686e49da7b2fb86f53ee6ed259 upstream. It seems that it is valid to have less than the requested number of descriptors. But what is not valid and leads to subsequent errors is to have zero descriptors. In that case, abort the probing. Fixes: e1f7c9eee707 ("dmaengine: at_xdmac: creation of the atmel eXtended DMA Controller driver") Signed-off-by: Michael Walle Link: https://lore.kernel.org/r/20220526135111.1470926-1-michael@walle.cc Signed-off-by: Vinod Koul Signed-off-by: Greg Kroah-Hartman --- drivers/dma/at_xdmac.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/dma/at_xdmac.c b/drivers/dma/at_xdmac.c index 177a537971a1b6..c5638afe943684 100644 --- a/drivers/dma/at_xdmac.c +++ b/drivers/dma/at_xdmac.c @@ -1898,6 +1898,11 @@ static int at_xdmac_alloc_chan_resources(struct dma_chan *chan) for (i = 0; i < init_nr_desc_per_channel; i++) { desc = at_xdmac_alloc_desc(chan, GFP_KERNEL); if (!desc) { + if (i == 0) { + dev_warn(chan2dev(chan), + "can't allocate any descriptors\n"); + return -EIO; + } dev_warn(chan2dev(chan), "only %d descriptors have been allocated\n", i); break; From cb9813d7eae917acd34436160a278b8b5d48ca53 Mon Sep 17 00:00:00 2001 From: Miaoqian Lin Date: Sun, 5 Jun 2022 08:27:23 +0400 Subject: [PATCH 0486/1056] dmaengine: ti: Fix refcount leak in ti_dra7_xbar_route_allocate commit c132fe78ad7b4ce8b5d49a501a15c29d08eeb23a upstream. of_parse_phandle() returns a node pointer with refcount incremented, we should use of_node_put() on it when not needed anymore. Add missing of_node_put() in to fix this. Fixes: ec9bfa1e1a79 ("dmaengine: ti-dma-crossbar: dra7: Use bitops instead of idr") Signed-off-by: Miaoqian Lin Link: https://lore.kernel.org/r/20220605042723.17668-2-linmq006@gmail.com Signed-off-by: Vinod Koul Signed-off-by: Greg Kroah-Hartman --- drivers/dma/ti/dma-crossbar.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/dma/ti/dma-crossbar.c b/drivers/dma/ti/dma-crossbar.c index 71d24fc07c0038..e34cfb50d24166 100644 --- a/drivers/dma/ti/dma-crossbar.c +++ b/drivers/dma/ti/dma-crossbar.c @@ -268,6 +268,7 @@ static void *ti_dra7_xbar_route_allocate(struct of_phandle_args *dma_spec, mutex_unlock(&xbar->mutex); dev_err(&pdev->dev, "Run out of free DMA requests\n"); kfree(map); + of_node_put(dma_spec->np); return ERR_PTR(-ENOMEM); } set_bit(map->xbar_out, xbar->dma_inuse); From 2f6ded79068cac8cff41d5d5632564165d98ee12 Mon Sep 17 00:00:00 2001 From: Caleb Connolly Date: Wed, 29 Jun 2022 15:06:00 +0100 Subject: [PATCH 0487/1056] dmaengine: qcom: bam_dma: fix runtime PM underflow commit 0ac9c3dd0d6fe293cd5044cfad10bec27d171e4e upstream. Commit dbad41e7bb5f ("dmaengine: qcom: bam_dma: check if the runtime pm enabled") caused unbalanced pm_runtime_get/put() calls when the bam is controlled remotely. This commit reverts it and just enables pm_runtime in all cases, the clk_* functions already just nop when the clock is NULL. Also clean up a bit by removing unnecessary bamclk null checks. Suggested-by: Stephan Gerhold Fixes: dbad41e7bb5f ("dmaengine: qcom: bam_dma: check if the runtime pm enabled") Signed-off-by: Caleb Connolly Link: https://lore.kernel.org/r/20220629140559.118537-1-caleb.connolly@linaro.org Signed-off-by: Vinod Koul Signed-off-by: Greg Kroah-Hartman --- drivers/dma/qcom/bam_dma.c | 39 +++++++++++--------------------------- 1 file changed, 11 insertions(+), 28 deletions(-) diff --git a/drivers/dma/qcom/bam_dma.c b/drivers/dma/qcom/bam_dma.c index c8a77b428b528b..ca8c862c9747e4 100644 --- a/drivers/dma/qcom/bam_dma.c +++ b/drivers/dma/qcom/bam_dma.c @@ -515,14 +515,6 @@ static int bam_alloc_chan(struct dma_chan *chan) return 0; } -static int bam_pm_runtime_get_sync(struct device *dev) -{ - if (pm_runtime_enabled(dev)) - return pm_runtime_get_sync(dev); - - return 0; -} - /** * bam_free_chan - Frees dma resources associated with specific channel * @chan: specified channel @@ -538,7 +530,7 @@ static void bam_free_chan(struct dma_chan *chan) unsigned long flags; int ret; - ret = bam_pm_runtime_get_sync(bdev->dev); + ret = pm_runtime_get_sync(bdev->dev); if (ret < 0) return; @@ -734,7 +726,7 @@ static int bam_pause(struct dma_chan *chan) unsigned long flag; int ret; - ret = bam_pm_runtime_get_sync(bdev->dev); + ret = pm_runtime_get_sync(bdev->dev); if (ret < 0) return ret; @@ -760,7 +752,7 @@ static int bam_resume(struct dma_chan *chan) unsigned long flag; int ret; - ret = bam_pm_runtime_get_sync(bdev->dev); + ret = pm_runtime_get_sync(bdev->dev); if (ret < 0) return ret; @@ -869,7 +861,7 @@ static irqreturn_t bam_dma_irq(int irq, void *data) if (srcs & P_IRQ) tasklet_schedule(&bdev->task); - ret = bam_pm_runtime_get_sync(bdev->dev); + ret = pm_runtime_get_sync(bdev->dev); if (ret < 0) return IRQ_NONE; @@ -987,7 +979,7 @@ static void bam_start_dma(struct bam_chan *bchan) if (!vd) return; - ret = bam_pm_runtime_get_sync(bdev->dev); + ret = pm_runtime_get_sync(bdev->dev); if (ret < 0) return; @@ -1350,11 +1342,6 @@ static int bam_dma_probe(struct platform_device *pdev) if (ret) goto err_unregister_dma; - if (!bdev->bamclk) { - pm_runtime_disable(&pdev->dev); - return 0; - } - pm_runtime_irq_safe(&pdev->dev); pm_runtime_set_autosuspend_delay(&pdev->dev, BAM_DMA_AUTOSUSPEND_DELAY); pm_runtime_use_autosuspend(&pdev->dev); @@ -1438,10 +1425,8 @@ static int __maybe_unused bam_dma_suspend(struct device *dev) { struct bam_device *bdev = dev_get_drvdata(dev); - if (bdev->bamclk) { - pm_runtime_force_suspend(dev); - clk_unprepare(bdev->bamclk); - } + pm_runtime_force_suspend(dev); + clk_unprepare(bdev->bamclk); return 0; } @@ -1451,13 +1436,11 @@ static int __maybe_unused bam_dma_resume(struct device *dev) struct bam_device *bdev = dev_get_drvdata(dev); int ret; - if (bdev->bamclk) { - ret = clk_prepare(bdev->bamclk); - if (ret) - return ret; + ret = clk_prepare(bdev->bamclk); + if (ret) + return ret; - pm_runtime_force_resume(dev); - } + pm_runtime_force_resume(dev); return 0; } From 568b2bd79b59f3c376bec40d7e93fb22c9d8a65f Mon Sep 17 00:00:00 2001 From: Miaoqian Lin Date: Sun, 5 Jun 2022 08:27:22 +0400 Subject: [PATCH 0488/1056] dmaengine: ti: Add missing put_device in ti_dra7_xbar_route_allocate commit 615a4bfc426e11dba05c2cf343f9ac752fb381d2 upstream. of_find_device_by_node() takes reference, we should use put_device() to release it when not need anymore. Fixes: a074ae38f859 ("dmaengine: Add driver for TI DMA crossbar on DRA7x") Signed-off-by: Miaoqian Lin Acked-by: Peter Ujfalusi Link: https://lore.kernel.org/r/20220605042723.17668-1-linmq006@gmail.com Signed-off-by: Vinod Koul Signed-off-by: Greg Kroah-Hartman --- drivers/dma/ti/dma-crossbar.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/dma/ti/dma-crossbar.c b/drivers/dma/ti/dma-crossbar.c index e34cfb50d24166..f744ddbbbad7fa 100644 --- a/drivers/dma/ti/dma-crossbar.c +++ b/drivers/dma/ti/dma-crossbar.c @@ -245,6 +245,7 @@ static void *ti_dra7_xbar_route_allocate(struct of_phandle_args *dma_spec, if (dma_spec->args[0] >= xbar->xbar_requests) { dev_err(&pdev->dev, "Invalid XBAR request number: %d\n", dma_spec->args[0]); + put_device(&pdev->dev); return ERR_PTR(-EINVAL); } @@ -252,12 +253,14 @@ static void *ti_dra7_xbar_route_allocate(struct of_phandle_args *dma_spec, dma_spec->np = of_parse_phandle(ofdma->of_node, "dma-masters", 0); if (!dma_spec->np) { dev_err(&pdev->dev, "Can't get DMA master\n"); + put_device(&pdev->dev); return ERR_PTR(-EINVAL); } map = kzalloc(sizeof(*map), GFP_KERNEL); if (!map) { of_node_put(dma_spec->np); + put_device(&pdev->dev); return ERR_PTR(-ENOMEM); } @@ -269,6 +272,7 @@ static void *ti_dra7_xbar_route_allocate(struct of_phandle_args *dma_spec, dev_err(&pdev->dev, "Run out of free DMA requests\n"); kfree(map); of_node_put(dma_spec->np); + put_device(&pdev->dev); return ERR_PTR(-ENOMEM); } set_bit(map->xbar_out, xbar->dma_inuse); From a5fe76328ea53d38c4f86aef52eb3af5bac9ab63 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Tue, 28 Jun 2022 16:00:56 -0700 Subject: [PATCH 0489/1056] dmaengine: idxd: force wq context cleanup on device disable path commit 44c4237cf3436bda2b185ff728123651ad133f69 upstream. Testing shown that when a wq mode is setup to be dedicated and then torn down and reconfigured to shared, the wq configured end up being dedicated anyays. The root cause is when idxd_device_wqs_clear_state() gets called during idxd_driver removal, idxd_wq_disable_cleanup() does not get called vs when the wq driver is removed first. The check of wq state being "enabled" causes the cleanup to be bypassed. However, idxd_driver->remove() releases all wq drivers. So the wqs goes to "disabled" state and will never be "enabled". By that point, the driver has no idea if the wq was previously configured or clean. So force call idxd_wq_disable_cleanup() on all wqs always to make sure everything gets cleaned up. Reported-by: Tony Zhu Tested-by: Tony Zhu Fixes: 0dcfe41e9a4c ("dmanegine: idxd: cleanup all device related bits after disabling device") Signed-off-by: Dave Jiang Co-developed-by: Fenghua Yu Signed-off-by: Fenghua Yu Link: https://lore.kernel.org/r/20220628230056.2527816-1-fenghua.yu@intel.com Signed-off-by: Vinod Koul Signed-off-by: Greg Kroah-Hartman --- drivers/dma/idxd/device.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c index e622245c93804e..11d3f2aede711a 100644 --- a/drivers/dma/idxd/device.c +++ b/drivers/dma/idxd/device.c @@ -720,10 +720,7 @@ static void idxd_device_wqs_clear_state(struct idxd_device *idxd) for (i = 0; i < idxd->max_wqs; i++) { struct idxd_wq *wq = idxd->wqs[i]; - if (wq->state == IDXD_WQ_ENABLED) { - idxd_wq_disable_cleanup(wq); - wq->state = IDXD_WQ_DISABLED; - } + idxd_wq_disable_cleanup(wq); idxd_wq_device_reset_cleanup(wq); } } From c0c041a60cac9ce2cdfa48fc45d525633428760e Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Thu, 30 Jun 2022 14:22:28 +0800 Subject: [PATCH 0490/1056] selftests/net: fix section name when using xdp_dummy.o commit d28b25a62a47a8c8aa19bd543863aab6717e68c9 upstream. Since commit 8fffa0e3451a ("selftests/bpf: Normalize XDP section names in selftests") the xdp_dummy.o's section name has changed to xdp. But some tests are still using "section xdp_dummy", which make the tests failed. Fix them by updating to the new section name. Fixes: 8fffa0e3451a ("selftests/bpf: Normalize XDP section names in selftests") Signed-off-by: Hangbin Liu Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20220630062228.3453016-1-liuhangbin@gmail.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/net/udpgro.sh | 2 +- tools/testing/selftests/net/udpgro_bench.sh | 2 +- tools/testing/selftests/net/udpgro_fwd.sh | 2 +- tools/testing/selftests/net/veth.sh | 6 +++--- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/tools/testing/selftests/net/udpgro.sh b/tools/testing/selftests/net/udpgro.sh index f8a19f548ae9d5..ebbd0b28243272 100755 --- a/tools/testing/selftests/net/udpgro.sh +++ b/tools/testing/selftests/net/udpgro.sh @@ -34,7 +34,7 @@ cfg_veth() { ip -netns "${PEER_NS}" addr add dev veth1 192.168.1.1/24 ip -netns "${PEER_NS}" addr add dev veth1 2001:db8::1/64 nodad ip -netns "${PEER_NS}" link set dev veth1 up - ip -n "${PEER_NS}" link set veth1 xdp object ../bpf/xdp_dummy.o section xdp_dummy + ip -n "${PEER_NS}" link set veth1 xdp object ../bpf/xdp_dummy.o section xdp } run_one() { diff --git a/tools/testing/selftests/net/udpgro_bench.sh b/tools/testing/selftests/net/udpgro_bench.sh index 820bc50f6b6871..fad2d1a71cac36 100755 --- a/tools/testing/selftests/net/udpgro_bench.sh +++ b/tools/testing/selftests/net/udpgro_bench.sh @@ -34,7 +34,7 @@ run_one() { ip -netns "${PEER_NS}" addr add dev veth1 2001:db8::1/64 nodad ip -netns "${PEER_NS}" link set dev veth1 up - ip -n "${PEER_NS}" link set veth1 xdp object ../bpf/xdp_dummy.o section xdp_dummy + ip -n "${PEER_NS}" link set veth1 xdp object ../bpf/xdp_dummy.o section xdp ip netns exec "${PEER_NS}" ./udpgso_bench_rx ${rx_args} -r & ip netns exec "${PEER_NS}" ./udpgso_bench_rx -t ${rx_args} -r & diff --git a/tools/testing/selftests/net/udpgro_fwd.sh b/tools/testing/selftests/net/udpgro_fwd.sh index 6f05e06f67613d..1bcd82e1f662ee 100755 --- a/tools/testing/selftests/net/udpgro_fwd.sh +++ b/tools/testing/selftests/net/udpgro_fwd.sh @@ -46,7 +46,7 @@ create_ns() { ip -n $BASE$ns addr add dev veth$ns $BM_NET_V4$ns/24 ip -n $BASE$ns addr add dev veth$ns $BM_NET_V6$ns/64 nodad done - ip -n $NS_DST link set veth$DST xdp object ../bpf/xdp_dummy.o section xdp_dummy 2>/dev/null + ip -n $NS_DST link set veth$DST xdp object ../bpf/xdp_dummy.o section xdp 2>/dev/null } create_vxlan_endpoint() { diff --git a/tools/testing/selftests/net/veth.sh b/tools/testing/selftests/net/veth.sh index 19eac3e44c0652..430895d1a2b63e 100755 --- a/tools/testing/selftests/net/veth.sh +++ b/tools/testing/selftests/net/veth.sh @@ -289,14 +289,14 @@ if [ $CPUS -gt 1 ]; then ip netns exec $NS_SRC ethtool -L veth$SRC rx 1 tx 2 2>/dev/null printf "%-60s" "bad setting: XDP with RX nr less than TX" ip -n $NS_DST link set dev veth$DST xdp object ../bpf/xdp_dummy.o \ - section xdp_dummy 2>/dev/null &&\ + section xdp 2>/dev/null &&\ echo "fail - set operation successful ?!?" || echo " ok " # the following tests will run with multiple channels active ip netns exec $NS_SRC ethtool -L veth$SRC rx 2 ip netns exec $NS_DST ethtool -L veth$DST rx 2 ip -n $NS_DST link set dev veth$DST xdp object ../bpf/xdp_dummy.o \ - section xdp_dummy 2>/dev/null + section xdp 2>/dev/null printf "%-60s" "bad setting: reducing RX nr below peer TX with XDP set" ip netns exec $NS_DST ethtool -L veth$DST rx 1 2>/dev/null &&\ echo "fail - set operation successful ?!?" || echo " ok " @@ -311,7 +311,7 @@ if [ $CPUS -gt 2 ]; then chk_channels "setting invalid channels nr" $DST 2 2 fi -ip -n $NS_DST link set dev veth$DST xdp object ../bpf/xdp_dummy.o section xdp_dummy 2>/dev/null +ip -n $NS_DST link set dev veth$DST xdp object ../bpf/xdp_dummy.o section xdp 2>/dev/null chk_gro_flag "with xdp attached - gro flag" $DST on chk_gro_flag " - peer gro flag" $SRC off chk_tso_flag " - tso flag" $SRC off From 843dae1756d9bddee21a96827784791fd97d484e Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 12 Jul 2022 16:35:19 +0200 Subject: [PATCH 0491/1056] Linux 5.15.54 Link: https://lore.kernel.org/r/20220711090604.055883544@linuxfoundation.org Link: https://lore.kernel.org/r/20220711145306.494277196@linuxfoundation.org Tested-by: Florian Fainelli Tested-by: Shuah Khan Tested-by: Ron Economos Tested-by: Bagas Sanjaya Link: https://lore.kernel.org/r/20220712071513.420542604@linuxfoundation.org Tested-by: Jon Hunter Tested-by: Bagas Sanjaya Tested-by: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 397fb08d17f2b7..f8dc156aa30215 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 5 PATCHLEVEL = 15 -SUBLEVEL = 53 +SUBLEVEL = 54 EXTRAVERSION = NAME = Trick or Treat From c80b15105a08dceffcbb0381f85696b46bce0d1b Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 15 Jul 2022 09:21:15 +0200 Subject: [PATCH 0492/1056] Revert "mtd: rawnand: gpmi: Fix setting busy timeout setting" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 0af674e7a764563496480c1e30fadf0048325978 which is commit 06781a5026350cde699d2d10c9914a25c1524f45 upstream. It is reported to cause data loss, so revert it to prevent that from happening for users of this driver. Reported-by: Tomasz Moń Reported-by: Sascha Hauer Cc: Miquel Raynal Link: https://lore.kernel.org/all/20220701110341.3094023-1-s.hauer@pengutronix.de/ Signed-off-by: Greg Kroah-Hartman --- drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c b/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c index 6eb0677777037f..b72b387c08ef78 100644 --- a/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c +++ b/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c @@ -685,7 +685,7 @@ static void gpmi_nfc_compute_timings(struct gpmi_nand_data *this, hw->timing0 = BF_GPMI_TIMING0_ADDRESS_SETUP(addr_setup_cycles) | BF_GPMI_TIMING0_DATA_HOLD(data_hold_cycles) | BF_GPMI_TIMING0_DATA_SETUP(data_setup_cycles); - hw->timing1 = BF_GPMI_TIMING1_BUSY_TIMEOUT(DIV_ROUND_UP(busy_timeout_cycles, 4096)); + hw->timing1 = BF_GPMI_TIMING1_BUSY_TIMEOUT(busy_timeout_cycles * 4096); /* * Derive NFC ideal delay from {3}: From baefa2315cb1371486f6661a628e96fa3336f573 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 15 Jul 2022 10:13:00 +0200 Subject: [PATCH 0493/1056] Linux 5.15.55 Signed-off-by: Greg Kroah-Hartman --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index f8dc156aa30215..c95f0f59885fdf 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 5 PATCHLEVEL = 15 -SUBLEVEL = 54 +SUBLEVEL = 55 EXTRAVERSION = NAME = Trick or Treat From d60bb64d326332f20da31887ce3939b5dd48c0a1 Mon Sep 17 00:00:00 2001 From: Meng Tang Date: Tue, 12 Jul 2022 14:00:05 +0800 Subject: [PATCH 0494/1056] ALSA: hda - Add fixup for Dell Latitidue E5430 commit 841bdf85c226803a78a9319af9b2caa9bf3e2eda upstream. Another Dell model, another fixup entry: Latitude E5430 needs the same fixup as other Latitude E series as workaround for noise problems. Signed-off-by: Meng Tang Cc: Link: https://lore.kernel.org/r/20220712060005.20176-1-tangmeng@uniontech.com Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 3c3eac2399da7d..e0603b97b69749 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -8704,6 +8704,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1025, 0x1430, "Acer TravelMate B311R-31", ALC256_FIXUP_ACER_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1025, 0x1466, "Acer Aspire A515-56", ALC255_FIXUP_ACER_HEADPHONE_AND_MIC), SND_PCI_QUIRK(0x1028, 0x0470, "Dell M101z", ALC269_FIXUP_DELL_M101Z), + SND_PCI_QUIRK(0x1028, 0x053c, "Dell Latitude E5430", ALC292_FIXUP_DELL_E7X), SND_PCI_QUIRK(0x1028, 0x054b, "Dell XPS one 2710", ALC275_FIXUP_DELL_XPS), SND_PCI_QUIRK(0x1028, 0x05bd, "Dell Latitude E6440", ALC292_FIXUP_DELL_E7X), SND_PCI_QUIRK(0x1028, 0x05be, "Dell Latitude E6540", ALC292_FIXUP_DELL_E7X), From 6744faa1d810fc8823da2bb0462c0abc5a14c08e Mon Sep 17 00:00:00 2001 From: Meng Tang Date: Mon, 11 Jul 2022 18:17:44 +0800 Subject: [PATCH 0495/1056] ALSA: hda/conexant: Apply quirk for another HP ProDesk 600 G3 model commit d16d69bf5a25d91c6d8f3e29711be12551bf56cd upstream. There is another HP ProDesk 600 G3 model with the PCI SSID 103c:82b4 that requires the quirk HP_MIC_NO_PRESENCE. Add the corresponding entry to the quirk table. Signed-off-by: Meng Tang Cc: Link: https://lore.kernel.org/r/20220711101744.25189-1-tangmeng@uniontech.com Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_conexant.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c index 0b7d500249f6e6..8c68e6e1387ef7 100644 --- a/sound/pci/hda/patch_conexant.c +++ b/sound/pci/hda/patch_conexant.c @@ -944,6 +944,7 @@ static const struct snd_pci_quirk cxt5066_fixups[] = { SND_PCI_QUIRK(0x103c, 0x828c, "HP EliteBook 840 G4", CXT_FIXUP_HP_DOCK), SND_PCI_QUIRK(0x103c, 0x8299, "HP 800 G3 SFF", CXT_FIXUP_HP_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x103c, 0x829a, "HP 800 G3 DM", CXT_FIXUP_HP_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x103c, 0x82b4, "HP ProDesk 600 G3", CXT_FIXUP_HP_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x103c, 0x836e, "HP ProBook 455 G5", CXT_FIXUP_MUTE_LED_GPIO), SND_PCI_QUIRK(0x103c, 0x837f, "HP ProBook 470 G5", CXT_FIXUP_MUTE_LED_GPIO), SND_PCI_QUIRK(0x103c, 0x83b2, "HP EliteBook 840 G5", CXT_FIXUP_HP_DOCK), From 32fad77c4cd2e3568ad40472e7c6069b7a37ce57 Mon Sep 17 00:00:00 2001 From: Meng Tang Date: Mon, 11 Jul 2022 16:15:27 +0800 Subject: [PATCH 0496/1056] ALSA: hda/realtek: Fix headset mic for Acer SF313-51 commit 5f3fe25e70559fa3b096ab17e13316c93ddb7020 upstream. The issue on Acer SWIFT SF313-51 is that headset microphone doesn't work. The following quirk fixed headset microphone issue. Note that the fixup of SF314-54/55 (ALC256_FIXUP_ACER_HEADSET_MIC) was not successful on my SF313-51. Signed-off-by: Meng Tang Cc: Link: https://lore.kernel.org/r/20220711081527.6254-1-tangmeng@uniontech.com Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index e0603b97b69749..330108b3c453be 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -8695,6 +8695,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1025, 0x1290, "Acer Veriton Z4860G", ALC286_FIXUP_ACER_AIO_HEADSET_MIC), SND_PCI_QUIRK(0x1025, 0x1291, "Acer Veriton Z4660G", ALC286_FIXUP_ACER_AIO_HEADSET_MIC), SND_PCI_QUIRK(0x1025, 0x129c, "Acer SWIFT SF314-55", ALC256_FIXUP_ACER_HEADSET_MIC), + SND_PCI_QUIRK(0x1025, 0x129d, "Acer SWIFT SF313-51", ALC256_FIXUP_ACER_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1025, 0x1300, "Acer SWIFT SF314-56", ALC256_FIXUP_ACER_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1025, 0x1308, "Acer Aspire Z24-890", ALC286_FIXUP_ACER_AIO_HEADSET_MIC), SND_PCI_QUIRK(0x1025, 0x132a, "Acer TravelMate B114-21", ALC233_FIXUP_ACER_HEADSET_MIC), From 7d0c5005c5806cb0747a634c2be68ef71561d6d2 Mon Sep 17 00:00:00 2001 From: Meng Tang Date: Tue, 12 Jul 2022 17:22:22 +0800 Subject: [PATCH 0497/1056] ALSA: hda/realtek - Fix headset mic problem for a HP machine with alc671 commit dbe75d314748e08fc6e4576d153d8a69621ee5ca upstream. On a HP 288 Pro G6, the front mic could not be detected.In order to get it working, the pin configuration needs to be set correctly, and the ALC671_FIXUP_HP_HEADSET_MIC2 fixup needs to be applied. Signed-off-by: Meng Tang Cc: Link: https://lore.kernel.org/r/20220712092222.21738-1-tangmeng@uniontech.com Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 330108b3c453be..f13384d25a1990 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -11003,6 +11003,7 @@ static const struct snd_pci_quirk alc662_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x1632, "HP RP5800", ALC662_FIXUP_HP_RP5800), SND_PCI_QUIRK(0x103c, 0x8719, "HP", ALC897_FIXUP_HP_HSMIC_VERB), SND_PCI_QUIRK(0x103c, 0x873e, "HP", ALC671_FIXUP_HP_HEADSET_MIC2), + SND_PCI_QUIRK(0x103c, 0x877e, "HP 288 Pro G6", ALC671_FIXUP_HP_HEADSET_MIC2), SND_PCI_QUIRK(0x103c, 0x885f, "HP 288 Pro G8", ALC671_FIXUP_HP_HEADSET_MIC2), SND_PCI_QUIRK(0x1043, 0x1080, "Asus UX501VW", ALC668_FIXUP_HEADSET_MODE), SND_PCI_QUIRK(0x1043, 0x11cd, "Asus N550", ALC662_FIXUP_ASUS_Nx50), From dd9746cf6da3988c06da1e9df83358bc7d5dd936 Mon Sep 17 00:00:00 2001 From: Jeremy Szu Date: Wed, 13 Jul 2022 10:27:04 +0800 Subject: [PATCH 0498/1056] ALSA: hda/realtek: fix mute/micmute LEDs for HP machines commit 61d307855eb1a2ae849da445edd5389db8a58a5c upstream. The HP ProBook 440/450 G9 and EliteBook 640/650 G9 have multiple motherboard design and they are using different subsystem ID of audio codec. Add the same quirk for other MBs. Signed-off-by: Jeremy Szu Cc: Link: https://lore.kernel.org/r/20220713022706.22892-1-jeremy.szu@canonical.com Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index f13384d25a1990..cc273fe5a580aa 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -8887,6 +8887,10 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x89c3, "HP", ALC285_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x89ca, "HP", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF), SND_PCI_QUIRK(0x103c, 0x8a78, "HP Dev One", ALC285_FIXUP_HP_LIMIT_INT_MIC_BOOST), + SND_PCI_QUIRK(0x103c, 0x8aa0, "HP ProBook 440 G9 (MB 8A9E)", ALC236_FIXUP_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x8aa3, "HP ProBook 450 G9 (MB 8AA1)", ALC236_FIXUP_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x8aa8, "HP EliteBook 640 G9 (MB 8AA6)", ALC236_FIXUP_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x8aab, "HP EliteBook 650 G9 (MB 8AA9)", ALC236_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x1043, 0x103e, "ASUS X540SA", ALC256_FIXUP_ASUS_MIC), SND_PCI_QUIRK(0x1043, 0x103f, "ASUS TX300", ALC282_FIXUP_ASUS_TX300), SND_PCI_QUIRK(0x1043, 0x106d, "Asus K53BE", ALC269_FIXUP_LIMIT_INT_MIC_BOOST), From cd2731b3efe80b14c6a88eb3bb895136f9a0d62c Mon Sep 17 00:00:00 2001 From: Meng Tang Date: Wed, 13 Jul 2022 14:33:32 +0800 Subject: [PATCH 0499/1056] ALSA: hda/realtek - Fix headset mic problem for a HP machine with alc221 commit 4ba5c853d7945b3855c3dcb293f7f9f019db641e upstream. On a HP 288 Pro G2 MT (X9W02AV), the front mic could not be detected. In order to get it working, the pin configuration needs to be set correctly, and the ALC221_FIXUP_HP_288PRO_MIC_NO_PRESENCE fixup needs to be applied. Signed-off-by: Meng Tang Cc: Link: https://lore.kernel.org/r/20220713063332.30095-1-tangmeng@uniontech.com Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index cc273fe5a580aa..d8a49e065fe3c0 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -6780,6 +6780,7 @@ enum { ALC298_FIXUP_LENOVO_SPK_VOLUME, ALC256_FIXUP_DELL_INSPIRON_7559_SUBWOOFER, ALC269_FIXUP_ATIV_BOOK_8, + ALC221_FIXUP_HP_288PRO_MIC_NO_PRESENCE, ALC221_FIXUP_HP_MIC_NO_PRESENCE, ALC256_FIXUP_ASUS_HEADSET_MODE, ALC256_FIXUP_ASUS_MIC, @@ -7707,6 +7708,16 @@ static const struct hda_fixup alc269_fixups[] = { .chained = true, .chain_id = ALC269_FIXUP_NO_SHUTUP }, + [ALC221_FIXUP_HP_288PRO_MIC_NO_PRESENCE] = { + .type = HDA_FIXUP_PINS, + .v.pins = (const struct hda_pintbl[]) { + { 0x19, 0x01a1913c }, /* use as headset mic, without its own jack detect */ + { 0x1a, 0x01813030 }, /* use as headphone mic, without its own jack detect */ + { } + }, + .chained = true, + .chain_id = ALC269_FIXUP_HEADSET_MODE + }, [ALC221_FIXUP_HP_MIC_NO_PRESENCE] = { .type = HDA_FIXUP_PINS, .v.pins = (const struct hda_pintbl[]) { @@ -8820,6 +8831,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x2335, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1), SND_PCI_QUIRK(0x103c, 0x2336, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1), SND_PCI_QUIRK(0x103c, 0x2337, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1), + SND_PCI_QUIRK(0x103c, 0x2b5e, "HP 288 Pro G2 MT", ALC221_FIXUP_HP_288PRO_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x103c, 0x802e, "HP Z240 SFF", ALC221_FIXUP_HP_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x103c, 0x802f, "HP Z240", ALC221_FIXUP_HP_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x103c, 0x8077, "HP", ALC256_FIXUP_HP_HEADSET_MIC), From 9d3243d774f5bc374f016c689bd6cefd8c4e7941 Mon Sep 17 00:00:00 2001 From: Meng Tang Date: Wed, 13 Jul 2022 17:41:33 +0800 Subject: [PATCH 0500/1056] ALSA: hda/realtek - Enable the headset-mic on a Xiaomi's laptop commit 9b043a8f386485c74c0f8eea2c287d5bdbdf3279 upstream. The headset on this machine is not defined, after applying the quirk ALC256_FIXUP_ASUS_HEADSET_MIC, the headset-mic works well Signed-off-by: Meng Tang Cc: Link: https://lore.kernel.org/r/20220713094133.9894-1-tangmeng@uniontech.com Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index d8a49e065fe3c0..f3fa35b302ce92 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -9158,6 +9158,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1d72, 0x1602, "RedmiBook", ALC255_FIXUP_XIAOMI_HEADSET_MIC), SND_PCI_QUIRK(0x1d72, 0x1701, "XiaomiNotebook Pro", ALC298_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1d72, 0x1901, "RedmiBook 14", ALC256_FIXUP_ASUS_HEADSET_MIC), + SND_PCI_QUIRK(0x1d72, 0x1945, "Redmi G", ALC256_FIXUP_ASUS_HEADSET_MIC), SND_PCI_QUIRK(0x1d72, 0x1947, "RedmiBook Air", ALC255_FIXUP_XIAOMI_HEADSET_MIC), SND_PCI_QUIRK(0x8086, 0x2074, "Intel NUC 8", ALC233_FIXUP_INTEL_NUC8_DMIC), SND_PCI_QUIRK(0x8086, 0x2080, "Intel NUC 8 Rugged", ALC256_FIXUP_INTEL_NUC8_RUGGED), From b99174ac57fe5d8867448c03b23828e63f24cb1c Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Wed, 13 Jul 2022 15:53:22 +0200 Subject: [PATCH 0501/1056] xen/netback: avoid entering xenvif_rx_next_skb() with an empty rx queue commit 94e8100678889ab428e68acadf042de723f094b9 upstream. xenvif_rx_next_skb() is expecting the rx queue not being empty, but in case the loop in xenvif_rx_action() is doing multiple iterations, the availability of another skb in the rx queue is not being checked. This can lead to crashes: [40072.537261] BUG: unable to handle kernel NULL pointer dereference at 0000000000000080 [40072.537407] IP: xenvif_rx_skb+0x23/0x590 [xen_netback] [40072.537534] PGD 0 P4D 0 [40072.537644] Oops: 0000 [#1] SMP NOPTI [40072.537749] CPU: 0 PID: 12505 Comm: v1-c40247-q2-gu Not tainted 4.12.14-122.121-default #1 SLE12-SP5 [40072.537867] Hardware name: HP ProLiant DL580 Gen9/ProLiant DL580 Gen9, BIOS U17 11/23/2021 [40072.537999] task: ffff880433b38100 task.stack: ffffc90043d40000 [40072.538112] RIP: e030:xenvif_rx_skb+0x23/0x590 [xen_netback] [40072.538217] RSP: e02b:ffffc90043d43de0 EFLAGS: 00010246 [40072.538319] RAX: 0000000000000000 RBX: ffffc90043cd7cd0 RCX: 00000000000000f7 [40072.538430] RDX: 0000000000000000 RSI: 0000000000000006 RDI: ffffc90043d43df8 [40072.538531] RBP: 000000000000003f R08: 000077ff80000000 R09: 0000000000000008 [40072.538644] R10: 0000000000007ff0 R11: 00000000000008f6 R12: ffffc90043ce2708 [40072.538745] R13: 0000000000000000 R14: ffffc90043d43ed0 R15: ffff88043ea748c0 [40072.538861] FS: 0000000000000000(0000) GS:ffff880484600000(0000) knlGS:0000000000000000 [40072.538988] CS: e033 DS: 0000 ES: 0000 CR0: 0000000080050033 [40072.539088] CR2: 0000000000000080 CR3: 0000000407ac8000 CR4: 0000000000040660 [40072.539211] Call Trace: [40072.539319] xenvif_rx_action+0x71/0x90 [xen_netback] [40072.539429] xenvif_kthread_guest_rx+0x14a/0x29c [xen_netback] Fix that by stopping the loop in case the rx queue becomes empty. Cc: stable@vger.kernel.org Fixes: 98f6d57ced73 ("xen-netback: process guest rx packets in batches") Signed-off-by: Juergen Gross Reviewed-by: Jan Beulich Reviewed-by: Paul Durrant Link: https://lore.kernel.org/r/20220713135322.19616-1-jgross@suse.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/net/xen-netback/rx.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/xen-netback/rx.c b/drivers/net/xen-netback/rx.c index dbac4c03d21a14..a0335407be4231 100644 --- a/drivers/net/xen-netback/rx.c +++ b/drivers/net/xen-netback/rx.c @@ -495,6 +495,7 @@ void xenvif_rx_action(struct xenvif_queue *queue) queue->rx_copy.completed = &completed_skbs; while (xenvif_rx_ring_slots_available(queue) && + !skb_queue_empty(&queue->rx_queue) && work_done < RX_BATCH_SIZE) { xenvif_rx_skb(queue); work_done++; From b856e5738b1c9a34fefb5a41753d4a03e2920eda Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 11 Jul 2022 18:16:25 +0200 Subject: [PATCH 0502/1056] fix race between exit_itimers() and /proc/pid/timers commit d5b36a4dbd06c5e8e36ca8ccc552f679069e2946 upstream. As Chris explains, the comment above exit_itimers() is not correct, we can race with proc_timers_seq_ops. Change exit_itimers() to clear signal->posix_timers with ->siglock held. Cc: Reported-by: chris@accessvector.net Signed-off-by: Oleg Nesterov Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/exec.c | 2 +- include/linux/sched/task.h | 2 +- kernel/exit.c | 2 +- kernel/time/posix-timers.c | 19 ++++++++++++++----- 4 files changed, 17 insertions(+), 8 deletions(-) diff --git a/fs/exec.c b/fs/exec.c index 29e865c59854a0..76196ddefbdd0c 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1298,7 +1298,7 @@ int begin_new_exec(struct linux_binprm * bprm) bprm->mm = NULL; #ifdef CONFIG_POSIX_TIMERS - exit_itimers(me->signal); + exit_itimers(me); flush_itimer_signals(); #endif diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h index 8db25fcc1eba7c..caae8e045160d8 100644 --- a/include/linux/sched/task.h +++ b/include/linux/sched/task.h @@ -81,7 +81,7 @@ static inline void exit_thread(struct task_struct *tsk) extern void do_group_exit(int); extern void exit_files(struct task_struct *); -extern void exit_itimers(struct signal_struct *); +extern void exit_itimers(struct task_struct *); extern pid_t kernel_clone(struct kernel_clone_args *kargs); struct task_struct *create_io_thread(int (*fn)(void *), void *arg, int node); diff --git a/kernel/exit.c b/kernel/exit.c index 91a43e57a32ebb..aefe7445508dbf 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -796,7 +796,7 @@ void __noreturn do_exit(long code) #ifdef CONFIG_POSIX_TIMERS hrtimer_cancel(&tsk->signal->real_timer); - exit_itimers(tsk->signal); + exit_itimers(tsk); #endif if (tsk->mm) setmax_mm_hiwater_rss(&tsk->signal->maxrss, tsk->mm); diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c index 1cd10b102c51c3..5dead89308b742 100644 --- a/kernel/time/posix-timers.c +++ b/kernel/time/posix-timers.c @@ -1051,15 +1051,24 @@ static void itimer_delete(struct k_itimer *timer) } /* - * This is called by do_exit or de_thread, only when there are no more - * references to the shared signal_struct. + * This is called by do_exit or de_thread, only when nobody else can + * modify the signal->posix_timers list. Yet we need sighand->siglock + * to prevent the race with /proc/pid/timers. */ -void exit_itimers(struct signal_struct *sig) +void exit_itimers(struct task_struct *tsk) { + struct list_head timers; struct k_itimer *tmr; - while (!list_empty(&sig->posix_timers)) { - tmr = list_entry(sig->posix_timers.next, struct k_itimer, list); + if (list_empty(&tsk->signal->posix_timers)) + return; + + spin_lock_irq(&tsk->sighand->siglock); + list_replace_init(&tsk->signal->posix_timers, &timers); + spin_unlock_irq(&tsk->sighand->siglock); + + while (!list_empty(&timers)) { + tmr = list_first_entry(&timers, struct k_itimer, list); itimer_delete(tmr); } } From 27056f20d7536c1f35c8ae1b4c1acc2f1415c4a5 Mon Sep 17 00:00:00 2001 From: Axel Rasmussen Date: Fri, 10 Jun 2022 10:38:12 -0700 Subject: [PATCH 0503/1056] mm: userfaultfd: fix UFFDIO_CONTINUE on fallocated shmem pages commit 73f37dbcfe1763ee2294c7717a1f571e27d17fd8 upstream. When fallocate() is used on a shmem file, the pages we allocate can end up with !PageUptodate. Since UFFDIO_CONTINUE tries to find the existing page the user wants to map with SGP_READ, we would fail to find such a page, since shmem_getpage_gfp returns with a "NULL" pagep for SGP_READ if it discovers !PageUptodate. As a result, UFFDIO_CONTINUE returns -EFAULT, as it would do if the page wasn't found in the page cache at all. This isn't the intended behavior. UFFDIO_CONTINUE is just trying to find if a page exists, and doesn't care whether it still needs to be cleared or not. So, instead of SGP_READ, pass in SGP_NOALLOC. This is the same, except for one critical difference: in the !PageUptodate case, SGP_NOALLOC will clear the page and then return it. With this change, UFFDIO_CONTINUE works properly (succeeds) on a shmem file which has been fallocated, but otherwise not modified. Link: https://lkml.kernel.org/r/20220610173812.1768919-1-axelrasmussen@google.com Fixes: 153132571f02 ("userfaultfd/shmem: support UFFDIO_CONTINUE for shmem") Signed-off-by: Axel Rasmussen Acked-by: Peter Xu Cc: Hugh Dickins Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- mm/userfaultfd.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c index c9bab48856c65c..3bbaf5f5353edb 100644 --- a/mm/userfaultfd.c +++ b/mm/userfaultfd.c @@ -227,7 +227,10 @@ static int mcontinue_atomic_pte(struct mm_struct *dst_mm, struct page *page; int ret; - ret = shmem_getpage(inode, pgoff, &page, SGP_READ); + ret = shmem_getpage(inode, pgoff, &page, SGP_NOALLOC); + /* Our caller expects us to return -EFAULT if we failed to find page. */ + if (ret == -ENOENT) + ret = -EFAULT; if (ret) goto out; if (!page) { From e4967d22882bd63e3cd6e39c2d7b0848c28116a9 Mon Sep 17 00:00:00 2001 From: "Gowans, James" Date: Thu, 23 Jun 2022 05:24:03 +0000 Subject: [PATCH 0504/1056] mm: split huge PUD on wp_huge_pud fallback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 14c99d65941538aa33edd8dc7b1bbbb593c324a2 upstream. Currently the implementation will split the PUD when a fallback is taken inside the create_huge_pud function. This isn't where it should be done: the splitting should be done in wp_huge_pud, just like it's done for PMDs. Reason being that if a callback is taken during create, there is no PUD yet so nothing to split, whereas if a fallback is taken when encountering a write protection fault there is something to split. It looks like this was the original intention with the commit where the splitting was introduced, but somehow it got moved to the wrong place between v1 and v2 of the patch series. Rebase mistake perhaps. Link: https://lkml.kernel.org/r/6f48d622eb8bce1ae5dd75327b0b73894a2ec407.camel@amazon.com Fixes: 327e9fd48972 ("mm: Split huge pages on write-notify or COW") Signed-off-by: James Gowans Reviewed-by: Thomas Hellström Cc: Christian König Cc: Jan H. Schönherr Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- mm/memory.c | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/mm/memory.c b/mm/memory.c index 26d115ded4ab7b..036eb765c4e808 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -4487,6 +4487,19 @@ static inline vm_fault_t wp_huge_pmd(struct vm_fault *vmf) static vm_fault_t create_huge_pud(struct vm_fault *vmf) { +#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && \ + defined(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD) + /* No support for anonymous transparent PUD pages yet */ + if (vma_is_anonymous(vmf->vma)) + return VM_FAULT_FALLBACK; + if (vmf->vma->vm_ops->huge_fault) + return vmf->vma->vm_ops->huge_fault(vmf, PE_SIZE_PUD); +#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ + return VM_FAULT_FALLBACK; +} + +static vm_fault_t wp_huge_pud(struct vm_fault *vmf, pud_t orig_pud) +{ #if defined(CONFIG_TRANSPARENT_HUGEPAGE) && \ defined(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD) /* No support for anonymous transparent PUD pages yet */ @@ -4501,19 +4514,7 @@ static vm_fault_t create_huge_pud(struct vm_fault *vmf) split: /* COW or write-notify not handled on PUD level: split pud.*/ __split_huge_pud(vmf->vma, vmf->pud, vmf->address); -#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ - return VM_FAULT_FALLBACK; -} - -static vm_fault_t wp_huge_pud(struct vm_fault *vmf, pud_t orig_pud) -{ -#ifdef CONFIG_TRANSPARENT_HUGEPAGE - /* No support for anonymous transparent PUD pages yet */ - if (vma_is_anonymous(vmf->vma)) - return VM_FAULT_FALLBACK; - if (vmf->vma->vm_ops->huge_fault) - return vmf->vma->vm_ops->huge_fault(vmf, PE_SIZE_PUD); -#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ +#endif /* CONFIG_TRANSPARENT_HUGEPAGE && CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */ return VM_FAULT_FALLBACK; } From 22eeff55679d9e7c0f768c79bfbd83e2f8142d89 Mon Sep 17 00:00:00 2001 From: Zheng Yejian Date: Mon, 11 Jul 2022 09:47:31 +0800 Subject: [PATCH 0505/1056] tracing/histograms: Fix memory leak problem commit 7edc3945bdce9c39198a10d6129377a5c53559c2 upstream. This reverts commit 46bbe5c671e06f070428b9be142cc4ee5cedebac. As commit 46bbe5c671e0 ("tracing: fix double free") said, the "double free" problem reported by clang static analyzer is: > In parse_var_defs() if there is a problem allocating > var_defs.expr, the earlier var_defs.name is freed. > This free is duplicated by free_var_defs() which frees > the rest of the list. However, if there is a problem allocating N-th var_defs.expr: + in parse_var_defs(), the freed 'earlier var_defs.name' is actually the N-th var_defs.name; + then in free_var_defs(), the names from 0th to (N-1)-th are freed; IF ALLOCATING PROBLEM HAPPENED HERE!!! -+ \ | 0th 1th (N-1)-th N-th V +-------------+-------------+-----+-------------+----------- var_defs: | name | expr | name | expr | ... | name | expr | name | /// +-------------+-------------+-----+-------------+----------- These two frees don't act on same name, so there was no "double free" problem before. Conversely, after that commit, we get a "memory leak" problem because the above "N-th var_defs.name" is not freed. If enable CONFIG_DEBUG_KMEMLEAK and inject a fault at where the N-th var_defs.expr allocated, then execute on shell like: $ echo 'hist:key=call_site:val=$v1,$v2:v1=bytes_req,v2=bytes_alloc' > \ /sys/kernel/debug/tracing/events/kmem/kmalloc/trigger Then kmemleak reports: unreferenced object 0xffff8fb100ef3518 (size 8): comm "bash", pid 196, jiffies 4295681690 (age 28.538s) hex dump (first 8 bytes): 76 31 00 00 b1 8f ff ff v1...... backtrace: [<0000000038fe4895>] kstrdup+0x2d/0x60 [<00000000c99c049a>] event_hist_trigger_parse+0x206f/0x20e0 [<00000000ae70d2cc>] trigger_process_regex+0xc0/0x110 [<0000000066737a4c>] event_trigger_write+0x75/0xd0 [<000000007341e40c>] vfs_write+0xbb/0x2a0 [<0000000087fde4c2>] ksys_write+0x59/0xd0 [<00000000581e9cdf>] do_syscall_64+0x3a/0x80 [<00000000cf3b065c>] entry_SYSCALL_64_after_hwframe+0x46/0xb0 Link: https://lkml.kernel.org/r/20220711014731.69520-1-zhengyejian1@huawei.com Cc: stable@vger.kernel.org Fixes: 46bbe5c671e0 ("tracing: fix double free") Reported-by: Hulk Robot Suggested-by: Steven Rostedt Reviewed-by: Tom Zanussi Signed-off-by: Zheng Yejian Signed-off-by: Steven Rostedt (Google) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace_events_hist.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index 45a78e9c65c96d..d5c7b9a37ed53a 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -4056,6 +4056,8 @@ static int parse_var_defs(struct hist_trigger_data *hist_data) s = kstrdup(field_str, GFP_KERNEL); if (!s) { + kfree(hist_data->attrs->var_defs.name[n_vars]); + hist_data->attrs->var_defs.name[n_vars] = NULL; ret = -ENOMEM; goto free; } From 1eb4bea3af81286a8c4860248d08445e8e71d943 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Wed, 6 Jul 2022 10:50:40 -0400 Subject: [PATCH 0506/1056] net: sock: tracing: Fix sock_exceed_buf_limit not to dereference stale pointer commit 820b8963adaea34a87abbecb906d1f54c0aabfb7 upstream. The trace event sock_exceed_buf_limit saves the prot->sysctl_mem pointer and then dereferences it in the TP_printk() portion. This is unsafe as the TP_printk() portion is executed at the time the buffer is read. That is, it can be seconds, minutes, days, months, even years later. If the proto is freed, then this dereference will can also lead to a kernel crash. Instead, save the sysctl_mem array into the ring buffer and have the TP_printk() reference that instead. This is the proper and safe way to read pointers in trace events. Link: https://lore.kernel.org/all/20220706052130.16368-12-kuniyu@amazon.com/ Cc: stable@vger.kernel.org Fixes: 3847ce32aea9f ("core: add tracepoints for queueing skb to rcvbuf") Signed-off-by: Steven Rostedt (Google) Acked-by: Kuniyuki Iwashima Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/trace/events/sock.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/include/trace/events/sock.h b/include/trace/events/sock.h index 12c315782766a6..777ee6cbe93302 100644 --- a/include/trace/events/sock.h +++ b/include/trace/events/sock.h @@ -98,7 +98,7 @@ TRACE_EVENT(sock_exceed_buf_limit, TP_STRUCT__entry( __array(char, name, 32) - __field(long *, sysctl_mem) + __array(long, sysctl_mem, 3) __field(long, allocated) __field(int, sysctl_rmem) __field(int, rmem_alloc) @@ -110,7 +110,9 @@ TRACE_EVENT(sock_exceed_buf_limit, TP_fast_assign( strncpy(__entry->name, prot->name, 32); - __entry->sysctl_mem = prot->sysctl_mem; + __entry->sysctl_mem[0] = READ_ONCE(prot->sysctl_mem[0]); + __entry->sysctl_mem[1] = READ_ONCE(prot->sysctl_mem[1]); + __entry->sysctl_mem[2] = READ_ONCE(prot->sysctl_mem[2]); __entry->allocated = allocated; __entry->sysctl_rmem = sk_get_rmem0(sk, prot); __entry->rmem_alloc = atomic_read(&sk->sk_rmem_alloc); From 6f497564bf6a5940eef27e688be6fa647261a6fd Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Wed, 13 Jul 2022 13:48:52 +0200 Subject: [PATCH 0507/1056] ip: fix dflt addr selection for connected nexthop commit 747c14307214b55dbd8250e1ab44cad8305756f1 upstream. When a nexthop is added, without a gw address, the default scope was set to 'host'. Thus, when a source address is selected, 127.0.0.1 may be chosen but rejected when the route is used. When using a route without a nexthop id, the scope can be configured in the route, thus the problem doesn't exist. To explain more deeply: when a user creates a nexthop, it cannot specify the scope. To create it, the function nh_create_ipv4() calls fib_check_nh() with scope set to 0. fib_check_nh() calls fib_check_nh_nongw() wich was setting scope to 'host'. Then, nh_create_ipv4() calls fib_info_update_nhc_saddr() with scope set to 'host'. The src addr is chosen before the route is inserted. When a 'standard' route (ie without a reference to a nexthop) is added, fib_create_info() calls fib_info_update_nhc_saddr() with the scope set by the user. iproute2 set the scope to 'link' by default. Here is a way to reproduce the problem: ip netns add foo ip -n foo link set lo up ip netns add bar ip -n bar link set lo up sleep 1 ip -n foo link add name eth0 type dummy ip -n foo link set eth0 up ip -n foo address add 192.168.0.1/24 dev eth0 ip -n foo link add name veth0 type veth peer name veth1 netns bar ip -n foo link set veth0 up ip -n bar link set veth1 up ip -n bar address add 192.168.1.1/32 dev veth1 ip -n bar route add default dev veth1 ip -n foo nexthop add id 1 dev veth0 ip -n foo route add 192.168.1.1 nhid 1 Try to get/use the route: > $ ip -n foo route get 192.168.1.1 > RTNETLINK answers: Invalid argument > $ ip netns exec foo ping -c1 192.168.1.1 > ping: connect: Invalid argument Try without nexthop group (iproute2 sets scope to 'link' by dflt): ip -n foo route del 192.168.1.1 ip -n foo route add 192.168.1.1 dev veth0 Try to get/use the route: > $ ip -n foo route get 192.168.1.1 > 192.168.1.1 dev veth0 src 192.168.0.1 uid 0 > cache > $ ip netns exec foo ping -c1 192.168.1.1 > PING 192.168.1.1 (192.168.1.1) 56(84) bytes of data. > 64 bytes from 192.168.1.1: icmp_seq=1 ttl=64 time=0.039 ms > > --- 192.168.1.1 ping statistics --- > 1 packets transmitted, 1 received, 0% packet loss, time 0ms > rtt min/avg/max/mdev = 0.039/0.039/0.039/0.000 ms CC: stable@vger.kernel.org Fixes: 597cfe4fc339 ("nexthop: Add support for IPv4 nexthops") Reported-by: Edwin Brossette Signed-off-by: Nicolas Dichtel Link: https://lore.kernel.org/r/20220713114853.29406-1-nicolas.dichtel@6wind.com Signed-off-by: Paolo Abeni Signed-off-by: Greg Kroah-Hartman --- net/ipv4/fib_semantics.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index b5563f5ff17657..a98350dacbc3fd 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -1228,7 +1228,7 @@ static int fib_check_nh_nongw(struct net *net, struct fib_nh *nh, nh->fib_nh_dev = in_dev->dev; dev_hold(nh->fib_nh_dev); - nh->fib_nh_scope = RT_SCOPE_HOST; + nh->fib_nh_scope = RT_SCOPE_LINK; if (!netif_carrier_ok(nh->fib_nh_dev)) nh->fib_nh_flags |= RTNH_F_LINKDOWN; err = 0; From a4f5e3a22fbd04df23f086f637821b8f4c677b08 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Tue, 28 Jun 2022 08:55:45 +0100 Subject: [PATCH 0508/1056] ARM: 9213/1: Print message about disabled Spectre workarounds only once commit e4ced82deb5fb17222fb82e092c3f8311955b585 upstream. Print the message about disabled Spectre workarounds only once. The message is printed each time CPU goes out from idling state on NVIDIA Tegra boards, causing storm in KMSG that makes system unusable. Cc: stable@vger.kernel.org Signed-off-by: Dmitry Osipenko Signed-off-by: Russell King (Oracle) Signed-off-by: Greg Kroah-Hartman --- arch/arm/mm/proc-v7-bugs.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/arm/mm/proc-v7-bugs.c b/arch/arm/mm/proc-v7-bugs.c index fb9f3eb6bf483d..f9730eba063285 100644 --- a/arch/arm/mm/proc-v7-bugs.c +++ b/arch/arm/mm/proc-v7-bugs.c @@ -108,8 +108,7 @@ static unsigned int spectre_v2_install_workaround(unsigned int method) #else static unsigned int spectre_v2_install_workaround(unsigned int method) { - pr_info("CPU%u: Spectre V2: workarounds disabled by configuration\n", - smp_processor_id()); + pr_info_once("Spectre V2: workarounds disabled by configuration\n"); return SPECTRE_VULNERABLE; } From 2a098504d7a03f51abe4baed581aab193ef83bed Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 30 Jun 2022 16:46:54 +0100 Subject: [PATCH 0509/1056] ARM: 9214/1: alignment: advance IT state after emulating Thumb instruction commit e5c46fde75e43c15a29b40e5fc5641727f97ae47 upstream. After emulating a misaligned load or store issued in Thumb mode, we have to advance the IT state by hand, or it will get out of sync with the actual instruction stream, which means we'll end up applying the wrong condition code to subsequent instructions. This might corrupt the program state rather catastrophically. So borrow the it_advance() helper from the probing code, and use it on CPSR if the emulated instruction is Thumb. Cc: Reviewed-by: Linus Walleij Signed-off-by: Ard Biesheuvel Signed-off-by: Russell King (Oracle) Signed-off-by: Greg Kroah-Hartman --- arch/arm/include/asm/ptrace.h | 26 ++++++++++++++++++++++++++ arch/arm/mm/alignment.c | 3 +++ arch/arm/probes/decode.h | 26 +------------------------- 3 files changed, 30 insertions(+), 25 deletions(-) diff --git a/arch/arm/include/asm/ptrace.h b/arch/arm/include/asm/ptrace.h index 93051e2f402c84..1408a6a15d0e03 100644 --- a/arch/arm/include/asm/ptrace.h +++ b/arch/arm/include/asm/ptrace.h @@ -163,5 +163,31 @@ static inline unsigned long user_stack_pointer(struct pt_regs *regs) ((current_stack_pointer | (THREAD_SIZE - 1)) - 7) - 1; \ }) + +/* + * Update ITSTATE after normal execution of an IT block instruction. + * + * The 8 IT state bits are split into two parts in CPSR: + * ITSTATE<1:0> are in CPSR<26:25> + * ITSTATE<7:2> are in CPSR<15:10> + */ +static inline unsigned long it_advance(unsigned long cpsr) +{ + if ((cpsr & 0x06000400) == 0) { + /* ITSTATE<2:0> == 0 means end of IT block, so clear IT state */ + cpsr &= ~PSR_IT_MASK; + } else { + /* We need to shift left ITSTATE<4:0> */ + const unsigned long mask = 0x06001c00; /* Mask ITSTATE<4:0> */ + unsigned long it = cpsr & mask; + it <<= 1; + it |= it >> (27 - 10); /* Carry ITSTATE<2> to correct place */ + it &= mask; + cpsr &= ~mask; + cpsr |= it; + } + return cpsr; +} + #endif /* __ASSEMBLY__ */ #endif diff --git a/arch/arm/mm/alignment.c b/arch/arm/mm/alignment.c index ea81e89e77400e..bcefe3f51744c6 100644 --- a/arch/arm/mm/alignment.c +++ b/arch/arm/mm/alignment.c @@ -935,6 +935,9 @@ do_alignment(unsigned long addr, unsigned int fsr, struct pt_regs *regs) if (type == TYPE_LDST) do_alignment_finish_ldst(addr, instr, regs, offset); + if (thumb_mode(regs)) + regs->ARM_cpsr = it_advance(regs->ARM_cpsr); + return 0; bad_or_fault: diff --git a/arch/arm/probes/decode.h b/arch/arm/probes/decode.h index 97317359899218..facc889d05eeea 100644 --- a/arch/arm/probes/decode.h +++ b/arch/arm/probes/decode.h @@ -14,6 +14,7 @@ #include #include #include +#include #include void __init arm_probes_decode_init(void); @@ -35,31 +36,6 @@ void __init find_str_pc_offset(void); #endif -/* - * Update ITSTATE after normal execution of an IT block instruction. - * - * The 8 IT state bits are split into two parts in CPSR: - * ITSTATE<1:0> are in CPSR<26:25> - * ITSTATE<7:2> are in CPSR<15:10> - */ -static inline unsigned long it_advance(unsigned long cpsr) - { - if ((cpsr & 0x06000400) == 0) { - /* ITSTATE<2:0> == 0 means end of IT block, so clear IT state */ - cpsr &= ~PSR_IT_MASK; - } else { - /* We need to shift left ITSTATE<4:0> */ - const unsigned long mask = 0x06001c00; /* Mask ITSTATE<4:0> */ - unsigned long it = cpsr & mask; - it <<= 1; - it |= it >> (27 - 10); /* Carry ITSTATE<2> to correct place */ - it &= mask; - cpsr &= ~mask; - cpsr |= it; - } - return cpsr; -} - static inline void __kprobes bx_write_pc(long pcv, struct pt_regs *regs) { long cpsr = regs->ARM_cpsr; From 5a9df31017999197b6e60535940f2f2fe1bd3b0d Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Sat, 2 Jul 2022 16:52:27 +0200 Subject: [PATCH 0510/1056] wifi: mac80211: fix queue selection for mesh/OCB interfaces commit 50e2ab39291947b6c6c7025cf01707c270fcde59 upstream. When using iTXQ, the code assumes that there is only one vif queue for broadcast packets, using the BE queue. Allowing non-BE queue marking violates that assumption and txq->ac == skb_queue_mapping is no longer guaranteed. This can cause issues with queue handling in the driver and also causes issues with the recent ATF change, resulting in an AQL underflow warning. Cc: stable@vger.kernel.org Signed-off-by: Felix Fietkau Link: https://lore.kernel.org/r/20220702145227.39356-1-nbd@nbd.name Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- net/mac80211/wme.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/mac80211/wme.c b/net/mac80211/wme.c index 62c6733e079232..d50480b3175055 100644 --- a/net/mac80211/wme.c +++ b/net/mac80211/wme.c @@ -147,8 +147,8 @@ u16 __ieee80211_select_queue(struct ieee80211_sub_if_data *sdata, bool qos; /* all mesh/ocb stations are required to support WME */ - if (sdata->vif.type == NL80211_IFTYPE_MESH_POINT || - sdata->vif.type == NL80211_IFTYPE_OCB) + if (sta && (sdata->vif.type == NL80211_IFTYPE_MESH_POINT || + sdata->vif.type == NL80211_IFTYPE_OCB)) qos = true; else if (sta) qos = sta->sta.wme; From 54aee4e5ce8c21555286a6333e46c1713880cf93 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 13 Jun 2022 12:19:50 -1000 Subject: [PATCH 0511/1056] cgroup: Use separate src/dst nodes when preloading css_sets for migration commit 07fd5b6cdf3cc30bfde8fe0f644771688be04447 upstream. Each cset (css_set) is pinned by its tasks. When we're moving tasks around across csets for a migration, we need to hold the source and destination csets to ensure that they don't go away while we're moving tasks about. This is done by linking cset->mg_preload_node on either the mgctx->preloaded_src_csets or mgctx->preloaded_dst_csets list. Using the same cset->mg_preload_node for both the src and dst lists was deemed okay as a cset can't be both the source and destination at the same time. Unfortunately, this overloading becomes problematic when multiple tasks are involved in a migration and some of them are identity noop migrations while others are actually moving across cgroups. For example, this can happen with the following sequence on cgroup1: #1> mkdir -p /sys/fs/cgroup/misc/a/b #2> echo $$ > /sys/fs/cgroup/misc/a/cgroup.procs #3> RUN_A_COMMAND_WHICH_CREATES_MULTIPLE_THREADS & #4> PID=$! #5> echo $PID > /sys/fs/cgroup/misc/a/b/tasks #6> echo $PID > /sys/fs/cgroup/misc/a/cgroup.procs the process including the group leader back into a. In this final migration, non-leader threads would be doing identity migration while the group leader is doing an actual one. After #3, let's say the whole process was in cset A, and that after #4, the leader moves to cset B. Then, during #6, the following happens: 1. cgroup_migrate_add_src() is called on B for the leader. 2. cgroup_migrate_add_src() is called on A for the other threads. 3. cgroup_migrate_prepare_dst() is called. It scans the src list. 4. It notices that B wants to migrate to A, so it tries to A to the dst list but realizes that its ->mg_preload_node is already busy. 5. and then it notices A wants to migrate to A as it's an identity migration, it culls it by list_del_init()'ing its ->mg_preload_node and putting references accordingly. 6. The rest of migration takes place with B on the src list but nothing on the dst list. This means that A isn't held while migration is in progress. If all tasks leave A before the migration finishes and the incoming task pins it, the cset will be destroyed leading to use-after-free. This is caused by overloading cset->mg_preload_node for both src and dst preload lists. We wanted to exclude the cset from the src list but ended up inadvertently excluding it from the dst list too. This patch fixes the issue by separating out cset->mg_preload_node into ->mg_src_preload_node and ->mg_dst_preload_node, so that the src and dst preloadings don't interfere with each other. Signed-off-by: Tejun Heo Reported-by: Mukesh Ojha Reported-by: shisiyuan Link: http://lkml.kernel.org/r/1654187688-27411-1-git-send-email-shisiyuan@xiaomi.com Link: https://www.spinics.net/lists/cgroups/msg33313.html Fixes: f817de98513d ("cgroup: prepare migration path for unified hierarchy") Cc: stable@vger.kernel.org # v3.16+ Signed-off-by: Greg Kroah-Hartman --- include/linux/cgroup-defs.h | 3 ++- kernel/cgroup/cgroup.c | 37 +++++++++++++++++++++++-------------- 2 files changed, 25 insertions(+), 15 deletions(-) diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index db2e147e069fe5..cd8b8bd5ec4d5a 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -264,7 +264,8 @@ struct css_set { * List of csets participating in the on-going migration either as * source or destination. Protected by cgroup_mutex. */ - struct list_head mg_preload_node; + struct list_head mg_src_preload_node; + struct list_head mg_dst_preload_node; struct list_head mg_node; /* diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index de8b4fa1e1fd68..e7c3b0e586f205 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -764,7 +764,8 @@ struct css_set init_css_set = { .task_iters = LIST_HEAD_INIT(init_css_set.task_iters), .threaded_csets = LIST_HEAD_INIT(init_css_set.threaded_csets), .cgrp_links = LIST_HEAD_INIT(init_css_set.cgrp_links), - .mg_preload_node = LIST_HEAD_INIT(init_css_set.mg_preload_node), + .mg_src_preload_node = LIST_HEAD_INIT(init_css_set.mg_src_preload_node), + .mg_dst_preload_node = LIST_HEAD_INIT(init_css_set.mg_dst_preload_node), .mg_node = LIST_HEAD_INIT(init_css_set.mg_node), /* @@ -1239,7 +1240,8 @@ static struct css_set *find_css_set(struct css_set *old_cset, INIT_LIST_HEAD(&cset->threaded_csets); INIT_HLIST_NODE(&cset->hlist); INIT_LIST_HEAD(&cset->cgrp_links); - INIT_LIST_HEAD(&cset->mg_preload_node); + INIT_LIST_HEAD(&cset->mg_src_preload_node); + INIT_LIST_HEAD(&cset->mg_dst_preload_node); INIT_LIST_HEAD(&cset->mg_node); /* Copy the set of subsystem state objects generated in @@ -2596,21 +2598,27 @@ int cgroup_migrate_vet_dst(struct cgroup *dst_cgrp) */ void cgroup_migrate_finish(struct cgroup_mgctx *mgctx) { - LIST_HEAD(preloaded); struct css_set *cset, *tmp_cset; lockdep_assert_held(&cgroup_mutex); spin_lock_irq(&css_set_lock); - list_splice_tail_init(&mgctx->preloaded_src_csets, &preloaded); - list_splice_tail_init(&mgctx->preloaded_dst_csets, &preloaded); + list_for_each_entry_safe(cset, tmp_cset, &mgctx->preloaded_src_csets, + mg_src_preload_node) { + cset->mg_src_cgrp = NULL; + cset->mg_dst_cgrp = NULL; + cset->mg_dst_cset = NULL; + list_del_init(&cset->mg_src_preload_node); + put_css_set_locked(cset); + } - list_for_each_entry_safe(cset, tmp_cset, &preloaded, mg_preload_node) { + list_for_each_entry_safe(cset, tmp_cset, &mgctx->preloaded_dst_csets, + mg_dst_preload_node) { cset->mg_src_cgrp = NULL; cset->mg_dst_cgrp = NULL; cset->mg_dst_cset = NULL; - list_del_init(&cset->mg_preload_node); + list_del_init(&cset->mg_dst_preload_node); put_css_set_locked(cset); } @@ -2652,7 +2660,7 @@ void cgroup_migrate_add_src(struct css_set *src_cset, src_cgrp = cset_cgroup_from_root(src_cset, dst_cgrp->root); - if (!list_empty(&src_cset->mg_preload_node)) + if (!list_empty(&src_cset->mg_src_preload_node)) return; WARN_ON(src_cset->mg_src_cgrp); @@ -2663,7 +2671,7 @@ void cgroup_migrate_add_src(struct css_set *src_cset, src_cset->mg_src_cgrp = src_cgrp; src_cset->mg_dst_cgrp = dst_cgrp; get_css_set(src_cset); - list_add_tail(&src_cset->mg_preload_node, &mgctx->preloaded_src_csets); + list_add_tail(&src_cset->mg_src_preload_node, &mgctx->preloaded_src_csets); } /** @@ -2688,7 +2696,7 @@ int cgroup_migrate_prepare_dst(struct cgroup_mgctx *mgctx) /* look up the dst cset for each src cset and link it to src */ list_for_each_entry_safe(src_cset, tmp_cset, &mgctx->preloaded_src_csets, - mg_preload_node) { + mg_src_preload_node) { struct css_set *dst_cset; struct cgroup_subsys *ss; int ssid; @@ -2707,7 +2715,7 @@ int cgroup_migrate_prepare_dst(struct cgroup_mgctx *mgctx) if (src_cset == dst_cset) { src_cset->mg_src_cgrp = NULL; src_cset->mg_dst_cgrp = NULL; - list_del_init(&src_cset->mg_preload_node); + list_del_init(&src_cset->mg_src_preload_node); put_css_set(src_cset); put_css_set(dst_cset); continue; @@ -2715,8 +2723,8 @@ int cgroup_migrate_prepare_dst(struct cgroup_mgctx *mgctx) src_cset->mg_dst_cset = dst_cset; - if (list_empty(&dst_cset->mg_preload_node)) - list_add_tail(&dst_cset->mg_preload_node, + if (list_empty(&dst_cset->mg_dst_preload_node)) + list_add_tail(&dst_cset->mg_dst_preload_node, &mgctx->preloaded_dst_csets); else put_css_set(dst_cset); @@ -2962,7 +2970,8 @@ static int cgroup_update_dfl_csses(struct cgroup *cgrp) goto out_finish; spin_lock_irq(&css_set_lock); - list_for_each_entry(src_cset, &mgctx.preloaded_src_csets, mg_preload_node) { + list_for_each_entry(src_cset, &mgctx.preloaded_src_csets, + mg_src_preload_node) { struct task_struct *task, *ntask; /* all tasks in src_csets need to be migrated */ From 531a140e269de71d0ad14957e67c1a2650b83f08 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Mon, 4 Jul 2022 12:42:03 +0100 Subject: [PATCH 0512/1056] btrfs: return -EAGAIN for NOWAIT dio reads/writes on compressed and inline extents commit a4527e1853f8ff6e0b7c2dadad6268bd38427a31 upstream. When doing a direct IO read or write, we always return -ENOTBLK when we find a compressed extent (or an inline extent) so that we fallback to buffered IO. This however is not ideal in case we are in a NOWAIT context (io_uring for example), because buffered IO can block and we currently have no support for NOWAIT semantics for buffered IO, so if we need to fallback to buffered IO we should first signal the caller that we may need to block by returning -EAGAIN instead. This behaviour can also result in short reads being returned to user space, which although it's not incorrect and user space should be able to deal with partial reads, it's somewhat surprising and even some popular applications like QEMU (Link tag #1) and MariaDB (Link tag #2) don't deal with short reads properly (or at all). The short read case happens when we try to read from a range that has a non-compressed and non-inline extent followed by a compressed extent. After having read the first extent, when we find the compressed extent we return -ENOTBLK from btrfs_dio_iomap_begin(), which results in iomap to treat the request as a short read, returning 0 (success) and waiting for previously submitted bios to complete (this happens at fs/iomap/direct-io.c:__iomap_dio_rw()). After that, and while at btrfs_file_read_iter(), we call filemap_read() to use buffered IO to read the remaining data, and pass it the number of bytes we were able to read with direct IO. Than at filemap_read() if we get a page fault error when accessing the read buffer, we return a partial read instead of an -EFAULT error, because the number of bytes previously read is greater than zero. So fix this by returning -EAGAIN for NOWAIT direct IO when we find a compressed or an inline extent. Reported-by: Dominique MARTINET Link: https://lore.kernel.org/linux-btrfs/YrrFGO4A1jS0GI0G@atmark-techno.com/ Link: https://jira.mariadb.org/browse/MDEV-27900?focusedCommentId=216582&page=com.atlassian.jira.plugin.system.issuetabpanels%3Acomment-tabpanel#comment-216582 Tested-by: Dominique MARTINET CC: stable@vger.kernel.org # 5.10+ Reviewed-by: Christoph Hellwig Signed-off-by: Filipe Manana Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/inode.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index d644dcaf300404..ea726205079015 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -7957,7 +7957,19 @@ static int btrfs_dio_iomap_begin(struct inode *inode, loff_t start, if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags) || em->block_start == EXTENT_MAP_INLINE) { free_extent_map(em); - ret = -ENOTBLK; + /* + * If we are in a NOWAIT context, return -EAGAIN in order to + * fallback to buffered IO. This is not only because we can + * block with buffered IO (no support for NOWAIT semantics at + * the moment) but also to avoid returning short reads to user + * space - this happens if we were able to read some data from + * previous non-compressed extents and then when we fallback to + * buffered IO, at btrfs_file_read_iter() by calling + * filemap_read(), we fail to fault in pages for the read buffer, + * in which case filemap_read() returns a short read (the number + * of bytes previously read is > 0, so it does not return -EFAULT). + */ + ret = (flags & IOMAP_NOWAIT) ? -EAGAIN : -ENOTBLK; goto unlock_err; } From fbe7451a3adae0d5302f227e730eff21a41b921a Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Thu, 30 Jun 2022 23:06:00 +0300 Subject: [PATCH 0513/1056] drm/panfrost: Put mapping instead of shmem obj on panfrost_mmu_map_fault_addr() error commit fb6e0637ab7ebd8e61fe24f4d663c4bae99cfa62 upstream. When panfrost_mmu_map_fault_addr() fails, the BO's mapping should be unreferenced and not the shmem object which backs the mapping. Cc: stable@vger.kernel.org Fixes: bdefca2d8dc0 ("drm/panfrost: Add the panfrost_gem_mapping concept") Reviewed-by: Steven Price Signed-off-by: Dmitry Osipenko Signed-off-by: Steven Price Link: https://patchwork.freedesktop.org/patch/msgid/20220630200601.1884120-2-dmitry.osipenko@collabora.com Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/panfrost/panfrost_mmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/panfrost/panfrost_mmu.c b/drivers/gpu/drm/panfrost/panfrost_mmu.c index dfe5f1d2976367..c0189cc9a2f17a 100644 --- a/drivers/gpu/drm/panfrost/panfrost_mmu.c +++ b/drivers/gpu/drm/panfrost/panfrost_mmu.c @@ -501,7 +501,7 @@ static int panfrost_mmu_map_fault_addr(struct panfrost_device *pfdev, int as, err_pages: drm_gem_shmem_put_pages(&bo->base); err_bo: - drm_gem_object_put(&bo->base.base); + panfrost_gem_mapping_put(bomapping); return ret; } From 1807d8867402a58b831a7fc16832747ff559a0d1 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Thu, 30 Jun 2022 23:06:01 +0300 Subject: [PATCH 0514/1056] drm/panfrost: Fix shrinker list corruption by madvise IOCTL commit 9fc33eaaa979d112d10fea729edcd2a2e21aa912 upstream. Calling madvise IOCTL twice on BO causes memory shrinker list corruption and crashes kernel because BO is already on the list and it's added to the list again, while BO should be removed from the list before it's re-added. Fix it. Cc: stable@vger.kernel.org Fixes: 013b65101315 ("drm/panfrost: Add madvise and shrinker support") Acked-by: Alyssa Rosenzweig Reviewed-by: Steven Price Signed-off-by: Dmitry Osipenko Signed-off-by: Steven Price Link: https://patchwork.freedesktop.org/patch/msgid/20220630200601.1884120-3-dmitry.osipenko@collabora.com Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/panfrost/panfrost_drv.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/panfrost/panfrost_drv.c b/drivers/gpu/drm/panfrost/panfrost_drv.c index 1ffaef5ec5ff51..de533f37276480 100644 --- a/drivers/gpu/drm/panfrost/panfrost_drv.c +++ b/drivers/gpu/drm/panfrost/panfrost_drv.c @@ -422,8 +422,8 @@ static int panfrost_ioctl_madvise(struct drm_device *dev, void *data, if (args->retained) { if (args->madv == PANFROST_MADV_DONTNEED) - list_add_tail(&bo->base.madv_list, - &pfdev->shrinker_list); + list_move_tail(&bo->base.madv_list, + &pfdev->shrinker_list); else if (args->madv == PANFROST_MADV_WILLNEED) list_del_init(&bo->base.madv_list); } From ca58387e7ad1b7e14b41bc6669c2432568f10b84 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Wed, 13 Jul 2022 17:49:15 +1000 Subject: [PATCH 0515/1056] fs/remap: constrain dedupe of EOF blocks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 5750676b64a561f7ec920d7c6ba130fc9c7378f3 upstream. If dedupe of an EOF block is not constrainted to match against only other EOF blocks with the same EOF offset into the block, it can match against any other block that has the same matching initial bytes in it, even if the bytes beyond EOF in the source file do not match. Fix this by constraining the EOF block matching to only match against other EOF blocks that have identical EOF offsets and data. This allows "whole file dedupe" to continue to work without allowing eof blocks to randomly match against partial full blocks with the same data. Reported-by: Ansgar Lößer Fixes: 1383a7ed6749 ("vfs: check file ranges before cloning files") Link: https://lore.kernel.org/linux-fsdevel/a7c93559-4ba1-df2f-7a85-55a143696405@tu-darmstadt.de/ Signed-off-by: Dave Chinner Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/remap_range.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/remap_range.c b/fs/remap_range.c index 6d4a9beaa09743..e69bafb96f093b 100644 --- a/fs/remap_range.c +++ b/fs/remap_range.c @@ -71,7 +71,8 @@ static int generic_remap_checks(struct file *file_in, loff_t pos_in, * Otherwise, make sure the count is also block-aligned, having * already confirmed the starting offsets' block alignment. */ - if (pos_in + count == size_in) { + if (pos_in + count == size_in && + (!(remap_flags & REMAP_FILE_DEDUP) || pos_out + count == size_out)) { bcount = ALIGN(size_in, bs) - pos_in; } else { if (!IS_ALIGNED(count, bs)) From bb676a80c6ebce605031442a8d87e2c1e1edde6b Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Thu, 23 Jun 2022 17:54:01 +0900 Subject: [PATCH 0516/1056] nilfs2: fix incorrect masking of permission flags for symlinks commit 5924e6ec1585445f251ea92713eb15beb732622a upstream. The permission flags of newly created symlinks are wrongly dropped on nilfs2 with the current umask value even though symlinks should have 777 (rwxrwxrwx) permissions: $ umask 0022 $ touch file && ln -s file symlink; ls -l file symlink -rw-r--r--. 1 root root 0 Jun 23 16:29 file lrwxr-xr-x. 1 root root 4 Jun 23 16:29 symlink -> file This fixes the bug by inserting a missing check that excludes symlinks. Link: https://lkml.kernel.org/r/1655974441-5612-1-git-send-email-konishi.ryusuke@gmail.com Signed-off-by: Ryusuke Konishi Reported-by: Tommy Pettersson Reported-by: Ciprian Craciun Tested-by: Ryusuke Konishi Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- fs/nilfs2/nilfs.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h index 7dcb77d3875933..aceb8aadca1487 100644 --- a/fs/nilfs2/nilfs.h +++ b/fs/nilfs2/nilfs.h @@ -198,6 +198,9 @@ static inline int nilfs_acl_chmod(struct inode *inode) static inline int nilfs_init_acl(struct inode *inode, struct inode *dir) { + if (S_ISLNK(inode->i_mode)) + return 0; + inode->i_mode &= ~current_umask(); return 0; } From d69f9ff4c8ab157f4568d6fbf613dd5683b4c033 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 20 Jun 2022 09:01:43 +0200 Subject: [PATCH 0517/1056] sh: convert nommu io{re,un}map() to static inline functions commit d684e0a52d36f8939eda30a0f31ee235ee4ee741 upstream. Recently, nommu iounmap() was converted from a static inline function to a macro again, basically reverting commit 4580ba4ad2e6b8dd ("sh: Convert iounmap() macros to inline functions"). With -Werror, this leads to build failures like: drivers/iio/adc/xilinx-ams.c: In function `ams_iounmap_ps': drivers/iio/adc/xilinx-ams.c:1195:14: error: unused variable `ams' [-Werror=unused-variable] 1195 | struct ams *ams = data; | ^~~ Fix this by replacing the macros for ioremap() and iounmap() by static inline functions, based on . Link: https://lkml.kernel.org/r/8d1b1766260961799b04035e7bc39a7f59729f72.1655708312.git.geert+renesas@glider.be Fixes: 13f1fc870dd74713 ("sh: move the ioremap implementation out of line") Signed-off-by: Geert Uytterhoeven Reported-by: kernel test robot Reported-by: Jonathan Cameron Acked-by: Jonathan Cameron Reviewed-by: Christoph Hellwig Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- arch/sh/include/asm/io.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/sh/include/asm/io.h b/arch/sh/include/asm/io.h index cf9a3ec32406f8..fba90e670ed41d 100644 --- a/arch/sh/include/asm/io.h +++ b/arch/sh/include/asm/io.h @@ -271,8 +271,12 @@ static inline void __iomem *ioremap_prot(phys_addr_t offset, unsigned long size, #endif /* CONFIG_HAVE_IOREMAP_PROT */ #else /* CONFIG_MMU */ -#define iounmap(addr) do { } while (0) -#define ioremap(offset, size) ((void __iomem *)(unsigned long)(offset)) +static inline void __iomem *ioremap(phys_addr_t offset, size_t size) +{ + return (void __iomem *)(unsigned long)offset; +} + +static inline void iounmap(volatile void __iomem *addr) { } #endif /* CONFIG_MMU */ #define ioremap_uc ioremap From fb593531571c93cc1595a0ad10474de2a72d1ec7 Mon Sep 17 00:00:00 2001 From: Xiu Jianfeng Date: Fri, 27 May 2022 19:17:26 +0800 Subject: [PATCH 0518/1056] Revert "evm: Fix memleak in init_desc" commit 51dd64bb99e4478fc5280171acd8e1b529eadaf7 upstream. This reverts commit ccf11dbaa07b328fa469415c362d33459c140a37. Commit ccf11dbaa07b ("evm: Fix memleak in init_desc") said there is memleak in init_desc. That may be incorrect, as we can see, tmp_tfm is saved in one of the two global variables hmac_tfm or evm_tfm[hash_algo], then if init_desc is called next time, there is no need to alloc tfm again, so in the error path of kmalloc desc or crypto_shash_init(desc), It is not a problem without freeing tmp_tfm. And also that commit did not reset the global variable to NULL after freeing tmp_tfm and this makes *tfm a dangling pointer which may cause a UAF issue. Reported-by: Guozihua (Scott) Signed-off-by: Xiu Jianfeng Signed-off-by: Mimi Zohar Signed-off-by: Greg Kroah-Hartman --- security/integrity/evm/evm_crypto.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/security/integrity/evm/evm_crypto.c b/security/integrity/evm/evm_crypto.c index 0450d79afdc8fc..b862f0f919bfce 100644 --- a/security/integrity/evm/evm_crypto.c +++ b/security/integrity/evm/evm_crypto.c @@ -75,7 +75,7 @@ static struct shash_desc *init_desc(char type, uint8_t hash_algo) { long rc; const char *algo; - struct crypto_shash **tfm, *tmp_tfm = NULL; + struct crypto_shash **tfm, *tmp_tfm; struct shash_desc *desc; if (type == EVM_XATTR_HMAC) { @@ -120,16 +120,13 @@ static struct shash_desc *init_desc(char type, uint8_t hash_algo) alloc: desc = kmalloc(sizeof(*desc) + crypto_shash_descsize(*tfm), GFP_KERNEL); - if (!desc) { - crypto_free_shash(tmp_tfm); + if (!desc) return ERR_PTR(-ENOMEM); - } desc->tfm = *tfm; rc = crypto_shash_init(desc); if (rc) { - crypto_free_shash(tmp_tfm); kfree(desc); return ERR_PTR(rc); } From ea22fcd0324de361057e0408fd61781f58f8252e Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Thu, 14 Jul 2022 15:23:39 -0700 Subject: [PATCH 0519/1056] xfs: only run COW extent recovery when there are no live extents [ Upstream commit 7993f1a431bc5271369d359941485a9340658ac3 ] As part of multiple customer escalations due to file data corruption after copy on write operations, I wrote some fstests that use fsstress to hammer on COW to shake things loose. Regrettably, I caught some filesystem shutdowns due to incorrect rmap operations with the following loop: mount # (0) fsstress & # (1) while true; do fsstress mount -o remount,ro # (2) fsstress mount -o remount,rw # (3) done When (2) happens, notice that (1) is still running. xfs_remount_ro will call xfs_blockgc_stop to walk the inode cache to free all the COW extents, but the blockgc mechanism races with (1)'s reader threads to take IOLOCKs and loses, which means that it doesn't clean them all out. Call such a file (A). When (3) happens, xfs_remount_rw calls xfs_reflink_recover_cow, which walks the ondisk refcount btree and frees any COW extent that it finds. This function does not check the inode cache, which means that incore COW forks of inode (A) is now inconsistent with the ondisk metadata. If one of those former COW extents are allocated and mapped into another file (B) and someone triggers a COW to the stale reservation in (A), A's dirty data will be written into (B) and once that's done, those blocks will be transferred to (A)'s data fork without bumping the refcount. The results are catastrophic -- file (B) and the refcount btree are now corrupt. In the first patch, we fixed the race condition in (2) so that (A) will always flush the COW fork. In this second patch, we move the _recover_cow call to the initial mount call in (0) for safety. As mentioned previously, xfs_reflink_recover_cow walks the refcount btree looking for COW staging extents, and frees them. This was intended to be run at mount time (when we know there are no live inodes) to clean up any leftover staging events that may have been left behind during an unclean shutdown. As a time "optimization" for readonly mounts, we deferred this to the ro->rw transition, not realizing that any failure to clean all COW forks during a rw->ro transition would result in catastrophic corruption. Therefore, remove this optimization and only run the recovery routine when we're guaranteed not to have any COW staging extents anywhere, which means we always run this at mount time. While we're at it, move the callsite to xfs_log_mount_finish because any refcount btree expansion (however unlikely given that we're removing records from the right side of the index) must be fed by a per-AG reservation, which doesn't exist in its current location. Fixes: 174edb0e46e5 ("xfs: store in-progress CoW allocations in the refcount btree") Signed-off-by: Darrick J. Wong Reviewed-by: Chandan Babu R Reviewed-by: Dave Chinner Signed-off-by: Leah Rumancik Acked-by: Darrick J. Wong Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_log_recover.c | 24 +++++++++++++++++++++++- fs/xfs/xfs_mount.c | 10 ---------- fs/xfs/xfs_reflink.c | 5 ++++- fs/xfs/xfs_super.c | 9 --------- 4 files changed, 27 insertions(+), 21 deletions(-) diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 10562ecbd9eac8..581aeb288b32b6 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -27,7 +27,7 @@ #include "xfs_buf_item.h" #include "xfs_ag.h" #include "xfs_quota.h" - +#include "xfs_reflink.h" #define BLK_AVG(blk1, blk2) ((blk1+blk2) >> 1) @@ -3502,6 +3502,28 @@ xlog_recover_finish( xlog_recover_process_iunlinks(log); xlog_recover_check_summary(log); + + /* + * Recover any CoW staging blocks that are still referenced by the + * ondisk refcount metadata. During mount there cannot be any live + * staging extents as we have not permitted any user modifications. + * Therefore, it is safe to free them all right now, even on a + * read-only mount. + */ + error = xfs_reflink_recover_cow(log->l_mp); + if (error) { + xfs_alert(log->l_mp, + "Failed to recover leftover CoW staging extents, err %d.", + error); + /* + * If we get an error here, make sure the log is shut down + * but return zero so that any log items committed since the + * end of intents processing can be pushed through the CIL + * and AIL. + */ + xfs_force_shutdown(log->l_mp, SHUTDOWN_LOG_IO_ERROR); + } + return 0; } diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 06dac09eddbd80..62f3c153d4b29e 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -922,15 +922,6 @@ xfs_mountfs( xfs_warn(mp, "Unable to allocate reserve blocks. Continuing without reserve pool."); - /* Recover any CoW blocks that never got remapped. */ - error = xfs_reflink_recover_cow(mp); - if (error) { - xfs_err(mp, - "Error %d recovering leftover CoW allocations.", error); - xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); - goto out_quota; - } - /* Reserve AG blocks for future btree expansion. */ error = xfs_fs_reserve_ag_blocks(mp); if (error && error != -ENOSPC) @@ -941,7 +932,6 @@ xfs_mountfs( out_agresv: xfs_fs_unreserve_ag_blocks(mp); - out_quota: xfs_qm_unmount_quotas(mp); out_rtunmount: xfs_rtunmount_inodes(mp); diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index 76355f29348840..36832e4bc803ca 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -749,7 +749,10 @@ xfs_reflink_end_cow( } /* - * Free leftover CoW reservations that didn't get cleaned out. + * Free all CoW staging blocks that are still referenced by the ondisk refcount + * metadata. The ondisk metadata does not track which inode created the + * staging extent, so callers must ensure that there are no cached inodes with + * live CoW staging extents. */ int xfs_reflink_recover_cow( diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index e8d19916ba99d1..5410bf0ab42626 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -1742,15 +1742,6 @@ xfs_remount_rw( */ xfs_restore_resvblks(mp); xfs_log_work_queue(mp); - - /* Recover any CoW blocks that never got remapped. */ - error = xfs_reflink_recover_cow(mp); - if (error) { - xfs_err(mp, - "Error %d recovering leftover CoW allocations.", error); - xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); - return error; - } xfs_blockgc_start(mp); /* Create the per-AG metadata reservation pool .*/ From 88beb994eae175645004266bea6502635741fa99 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Thu, 14 Jul 2022 15:23:40 -0700 Subject: [PATCH 0520/1056] xfs: don't include bnobt blocks when reserving free block pool [ Upstream commit c8c568259772751a14e969b7230990508de73d9d ] xfs_reserve_blocks controls the size of the user-visible free space reserve pool. Given the difference between the current and requested pool sizes, it will try to reserve free space from fdblocks. However, the amount requested from fdblocks is also constrained by the amount of space that we think xfs_mod_fdblocks will give us. If we forget to subtract m_allocbt_blks before calling xfs_mod_fdblocks, it will will return ENOSPC and we'll hang the kernel at mount due to the infinite loop. In commit fd43cf600cf6, we decided that xfs_mod_fdblocks should not hand out the "free space" used by the free space btrees, because some portion of the free space btrees hold in reserve space for future btree expansion. Unfortunately, xfs_reserve_blocks' estimation of the number of blocks that it could request from xfs_mod_fdblocks was not updated to include m_allocbt_blks, so if space is extremely low, the caller hangs. Fix this by creating a function to estimate the number of blocks that can be reserved from fdblocks, which needs to exclude the set-aside and m_allocbt_blks. Found by running xfs/306 (which formats a single-AG 20MB filesystem) with an fstests configuration that specifies a 1k blocksize and a specially crafted log size that will consume 7/8 of the space (17920 blocks, specifically) in that AG. Cc: Brian Foster Fixes: fd43cf600cf6 ("xfs: set aside allocation btree blocks from block reservation") Signed-off-by: Darrick J. Wong Reviewed-by: Brian Foster Reviewed-by: Dave Chinner Signed-off-by: Leah Rumancik Acked-by: Darrick J. Wong Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_fsops.c | 2 +- fs/xfs/xfs_mount.c | 2 +- fs/xfs/xfs_mount.h | 15 +++++++++++++++ 3 files changed, 17 insertions(+), 2 deletions(-) diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c index 33e26690a8c4fc..710e857bb825f2 100644 --- a/fs/xfs/xfs_fsops.c +++ b/fs/xfs/xfs_fsops.c @@ -434,7 +434,7 @@ xfs_reserve_blocks( error = -ENOSPC; do { free = percpu_counter_sum(&mp->m_fdblocks) - - mp->m_alloc_set_aside; + xfs_fdblocks_unavailable(mp); if (free <= 0) break; diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 62f3c153d4b29e..76056de83971ce 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -1132,7 +1132,7 @@ xfs_mod_fdblocks( * problems (i.e. transaction abort, pagecache discards, etc.) than * slightly premature -ENOSPC. */ - set_aside = mp->m_alloc_set_aside + atomic64_read(&mp->m_allocbt_blks); + set_aside = xfs_fdblocks_unavailable(mp); percpu_counter_add_batch(&mp->m_fdblocks, delta, batch); if (__percpu_counter_compare(&mp->m_fdblocks, set_aside, XFS_FDBLOCKS_BATCH) >= 0) { diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index e091f3b3fa1582..86564295fce6d7 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -478,6 +478,21 @@ extern void xfs_unmountfs(xfs_mount_t *); */ #define XFS_FDBLOCKS_BATCH 1024 +/* + * Estimate the amount of free space that is not available to userspace and is + * not explicitly reserved from the incore fdblocks. This includes: + * + * - The minimum number of blocks needed to support splitting a bmap btree + * - The blocks currently in use by the freespace btrees because they record + * the actual blocks that will fill per-AG metadata space reservations + */ +static inline uint64_t +xfs_fdblocks_unavailable( + struct xfs_mount *mp) +{ + return mp->m_alloc_set_aside + atomic64_read(&mp->m_allocbt_blks); +} + extern int xfs_mod_fdblocks(struct xfs_mount *mp, int64_t delta, bool reserved); extern int xfs_mod_frextents(struct xfs_mount *mp, int64_t delta); From d4dab8b405c6415b5b57741a7c78a066b8076d1f Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Thu, 14 Jul 2022 15:23:41 -0700 Subject: [PATCH 0521/1056] xfs: run callbacks before waking waiters in xlog_state_shutdown_callbacks [ Upstream commit cd6f79d1fb324968a3bae92f82eeb7d28ca1fd22 ] Brian reported a null pointer dereference failure during unmount in xfs/006. He tracked the problem down to the AIL being torn down before a log shutdown had completed and removed all the items from the AIL. The failure occurred in this path while unmount was proceeding in another task: xfs_trans_ail_delete+0x102/0x130 [xfs] xfs_buf_item_done+0x22/0x30 [xfs] xfs_buf_ioend+0x73/0x4d0 [xfs] xfs_trans_committed_bulk+0x17e/0x2f0 [xfs] xlog_cil_committed+0x2a9/0x300 [xfs] xlog_cil_process_committed+0x69/0x80 [xfs] xlog_state_shutdown_callbacks+0xce/0xf0 [xfs] xlog_force_shutdown+0xdf/0x150 [xfs] xfs_do_force_shutdown+0x5f/0x150 [xfs] xlog_ioend_work+0x71/0x80 [xfs] process_one_work+0x1c5/0x390 worker_thread+0x30/0x350 kthread+0xd7/0x100 ret_from_fork+0x1f/0x30 This is processing an EIO error to a log write, and it's triggering a force shutdown. This causes the log to be shut down, and then it is running attached iclog callbacks from the shutdown context. That means the fs and log has already been marked as xfs_is_shutdown/xlog_is_shutdown and so high level code will abort (e.g. xfs_trans_commit(), xfs_log_force(), etc) with an error because of shutdown. The umount would have been blocked waiting for a log force completion inside xfs_log_cover() -> xfs_sync_sb(). The first thing for this situation to occur is for xfs_sync_sb() to exit without waiting for the iclog buffer to be comitted to disk. The above trace is the completion routine for the iclog buffer, and it is shutting down the filesystem. xlog_state_shutdown_callbacks() does this: { struct xlog_in_core *iclog; LIST_HEAD(cb_list); spin_lock(&log->l_icloglock); iclog = log->l_iclog; do { if (atomic_read(&iclog->ic_refcnt)) { /* Reference holder will re-run iclog callbacks. */ continue; } list_splice_init(&iclog->ic_callbacks, &cb_list); >>>>>> wake_up_all(&iclog->ic_write_wait); >>>>>> wake_up_all(&iclog->ic_force_wait); } while ((iclog = iclog->ic_next) != log->l_iclog); wake_up_all(&log->l_flush_wait); spin_unlock(&log->l_icloglock); >>>>>> xlog_cil_process_committed(&cb_list); } This wakes any thread waiting on IO completion of the iclog (in this case the umount log force) before shutdown processes all the pending callbacks. That means the xfs_sync_sb() waiting on a sync transaction in xfs_log_force() on iclog->ic_force_wait will get woken before the callbacks attached to that iclog are run. This results in xfs_sync_sb() returning an error, and so unmount unblocks and continues to run whilst the log shutdown is still in progress. Normally this is just fine because the force waiter has nothing to do with AIL operations. But in the case of this unmount path, the log force waiter goes on to tear down the AIL because the log is now shut down and so nothing ever blocks it again from the wait point in xfs_log_cover(). Hence it's a race to see who gets to the AIL first - the unmount code or xlog_cil_process_committed() killing the superblock buffer. To fix this, we just have to change the order of processing in xlog_state_shutdown_callbacks() to run the callbacks before it wakes any task waiting on completion of the iclog. Reported-by: Brian Foster Fixes: aad7272a9208 ("xfs: separate out log shutdown callback processing") Signed-off-by: Dave Chinner Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong Signed-off-by: Leah Rumancik Acked-by: Darrick J. Wong Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_log.c | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index f6cd2d4aa770da..9ac4fc177d9320 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -487,7 +487,10 @@ xfs_log_reserve( * Run all the pending iclog callbacks and wake log force waiters and iclog * space waiters so they can process the newly set shutdown state. We really * don't care what order we process callbacks here because the log is shut down - * and so state cannot change on disk anymore. + * and so state cannot change on disk anymore. However, we cannot wake waiters + * until the callbacks have been processed because we may be in unmount and + * we must ensure that all AIL operations the callbacks perform have completed + * before we tear down the AIL. * * We avoid processing actively referenced iclogs so that we don't run callbacks * while the iclog owner might still be preparing the iclog for IO submssion. @@ -501,7 +504,6 @@ xlog_state_shutdown_callbacks( struct xlog_in_core *iclog; LIST_HEAD(cb_list); - spin_lock(&log->l_icloglock); iclog = log->l_iclog; do { if (atomic_read(&iclog->ic_refcnt)) { @@ -509,14 +511,16 @@ xlog_state_shutdown_callbacks( continue; } list_splice_init(&iclog->ic_callbacks, &cb_list); + spin_unlock(&log->l_icloglock); + + xlog_cil_process_committed(&cb_list); + + spin_lock(&log->l_icloglock); wake_up_all(&iclog->ic_write_wait); wake_up_all(&iclog->ic_force_wait); } while ((iclog = iclog->ic_next) != log->l_iclog); wake_up_all(&log->l_flush_wait); - spin_unlock(&log->l_icloglock); - - xlog_cil_process_committed(&cb_list); } /* @@ -583,11 +587,8 @@ xlog_state_release_iclog( * pending iclog callbacks that were waiting on the release of * this iclog. */ - if (last_ref) { - spin_unlock(&log->l_icloglock); + if (last_ref) xlog_state_shutdown_callbacks(log); - spin_lock(&log->l_icloglock); - } return -EIO; } @@ -3904,7 +3905,10 @@ xlog_force_shutdown( wake_up_all(&log->l_cilp->xc_start_wait); wake_up_all(&log->l_cilp->xc_commit_wait); spin_unlock(&log->l_cilp->xc_push_lock); + + spin_lock(&log->l_icloglock); xlog_state_shutdown_callbacks(log); + spin_unlock(&log->l_icloglock); return log_error; } From d8124f111b67c2ba865649d2b3cbbff9226870b9 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Thu, 14 Jul 2022 15:23:42 -0700 Subject: [PATCH 0522/1056] xfs: drop async cache flushes from CIL commits. [ Upstream commit 919edbadebe17a67193533f531c2920c03e40fa4 ] Jan Kara reported a performance regression in dbench that he bisected down to commit bad77c375e8d ("xfs: CIL checkpoint flushes caches unconditionally"). Whilst developing the journal flush/fua optimisations this cache was part of, it appeared to made a significant difference to performance. However, now that this patchset has settled and all the correctness issues fixed, there does not appear to be any significant performance benefit to asynchronous cache flushes. In fact, the opposite is true on some storage types and workloads, where additional cache flushes that can occur from fsync heavy workloads have measurable and significant impact on overall throughput. Local dbench testing shows little difference on dbench runs with sync vs async cache flushes on either fast or slow SSD storage, and no difference in streaming concurrent async transaction workloads like fs-mark. Fast NVME storage. >From `dbench -t 30`, CIL scale: clients async sync BW Latency BW Latency 1 935.18 0.855 915.64 0.903 8 2404.51 6.873 2341.77 6.511 16 3003.42 6.460 2931.57 6.529 32 3697.23 7.939 3596.28 7.894 128 7237.43 15.495 7217.74 11.588 512 5079.24 90.587 5167.08 95.822 fsmark, 32 threads, create w/ 64 byte xattr w/32k logbsize create chown unlink async 1m41s 1m16s 2m03s sync 1m40s 1m19s 1m54s Slower SATA SSD storage: >From `dbench -t 30`, CIL scale: clients async sync BW Latency BW Latency 1 78.59 15.792 83.78 10.729 8 367.88 92.067 404.63 59.943 16 564.51 72.524 602.71 76.089 32 831.66 105.984 870.26 110.482 128 1659.76 102.969 1624.73 91.356 512 2135.91 223.054 2603.07 161.160 fsmark, 16 threads, create w/32k logbsize create unlink async 5m06s 4m15s sync 5m00s 4m22s And on Jan's test machine: 5.18-rc8-vanilla 5.18-rc8-patched Amean 1 71.22 ( 0.00%) 64.94 * 8.81%* Amean 2 93.03 ( 0.00%) 84.80 * 8.85%* Amean 4 150.54 ( 0.00%) 137.51 * 8.66%* Amean 8 252.53 ( 0.00%) 242.24 * 4.08%* Amean 16 454.13 ( 0.00%) 439.08 * 3.31%* Amean 32 835.24 ( 0.00%) 829.74 * 0.66%* Amean 64 1740.59 ( 0.00%) 1686.73 * 3.09%* Performance and cache flush behaviour is restored to pre-regression levels. As such, we can now consider the async cache flush mechanism an unnecessary exercise in premature optimisation and hence we can now remove it and the infrastructure it requires completely. Fixes: bad77c375e8d ("xfs: CIL checkpoint flushes caches unconditionally") Reported-and-tested-by: Jan Kara Signed-off-by: Dave Chinner Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong Signed-off-by: Leah Rumancik Acked-by: Darrick J. Wong Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_bio_io.c | 35 ----------------------------------- fs/xfs/xfs_linux.h | 2 -- fs/xfs/xfs_log.c | 36 +++++++++++------------------------- fs/xfs/xfs_log_cil.c | 42 +++++++++++++----------------------------- fs/xfs/xfs_log_priv.h | 3 +-- 5 files changed, 25 insertions(+), 93 deletions(-) diff --git a/fs/xfs/xfs_bio_io.c b/fs/xfs/xfs_bio_io.c index 667e297f59b165..17f36db2f79289 100644 --- a/fs/xfs/xfs_bio_io.c +++ b/fs/xfs/xfs_bio_io.c @@ -9,41 +9,6 @@ static inline unsigned int bio_max_vecs(unsigned int count) return bio_max_segs(howmany(count, PAGE_SIZE)); } -static void -xfs_flush_bdev_async_endio( - struct bio *bio) -{ - complete(bio->bi_private); -} - -/* - * Submit a request for an async cache flush to run. If the request queue does - * not require flush operations, just skip it altogether. If the caller needs - * to wait for the flush completion at a later point in time, they must supply a - * valid completion. This will be signalled when the flush completes. The - * caller never sees the bio that is issued here. - */ -void -xfs_flush_bdev_async( - struct bio *bio, - struct block_device *bdev, - struct completion *done) -{ - struct request_queue *q = bdev->bd_disk->queue; - - if (!test_bit(QUEUE_FLAG_WC, &q->queue_flags)) { - complete(done); - return; - } - - bio_init(bio, NULL, 0); - bio_set_dev(bio, bdev); - bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH | REQ_SYNC; - bio->bi_private = done; - bio->bi_end_io = xfs_flush_bdev_async_endio; - - submit_bio(bio); -} int xfs_rw_bdev( struct block_device *bdev, diff --git a/fs/xfs/xfs_linux.h b/fs/xfs/xfs_linux.h index 09a8fba84ff99b..cb9105d667db44 100644 --- a/fs/xfs/xfs_linux.h +++ b/fs/xfs/xfs_linux.h @@ -197,8 +197,6 @@ static inline uint64_t howmany_64(uint64_t x, uint32_t y) int xfs_rw_bdev(struct block_device *bdev, sector_t sector, unsigned int count, char *data, unsigned int op); -void xfs_flush_bdev_async(struct bio *bio, struct block_device *bdev, - struct completion *done); #define ASSERT_ALWAYS(expr) \ (likely(expr) ? (void)0 : assfail(NULL, #expr, __FILE__, __LINE__)) diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 9ac4fc177d9320..0fb7d05ca308de 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -527,12 +527,6 @@ xlog_state_shutdown_callbacks( * Flush iclog to disk if this is the last reference to the given iclog and the * it is in the WANT_SYNC state. * - * If the caller passes in a non-zero @old_tail_lsn and the current log tail - * does not match, there may be metadata on disk that must be persisted before - * this iclog is written. To satisfy that requirement, set the - * XLOG_ICL_NEED_FLUSH flag as a condition for writing this iclog with the new - * log tail value. - * * If XLOG_ICL_NEED_FUA is already set on the iclog, we need to ensure that the * log tail is updated correctly. NEED_FUA indicates that the iclog will be * written to stable storage, and implies that a commit record is contained @@ -549,12 +543,10 @@ xlog_state_shutdown_callbacks( * always capture the tail lsn on the iclog on the first NEED_FUA release * regardless of the number of active reference counts on this iclog. */ - int xlog_state_release_iclog( struct xlog *log, - struct xlog_in_core *iclog, - xfs_lsn_t old_tail_lsn) + struct xlog_in_core *iclog) { xfs_lsn_t tail_lsn; bool last_ref; @@ -565,18 +557,14 @@ xlog_state_release_iclog( /* * Grabbing the current log tail needs to be atomic w.r.t. the writing * of the tail LSN into the iclog so we guarantee that the log tail does - * not move between deciding if a cache flush is required and writing - * the LSN into the iclog below. + * not move between the first time we know that the iclog needs to be + * made stable and when we eventually submit it. */ - if (old_tail_lsn || iclog->ic_state == XLOG_STATE_WANT_SYNC) { + if ((iclog->ic_state == XLOG_STATE_WANT_SYNC || + (iclog->ic_flags & XLOG_ICL_NEED_FUA)) && + !iclog->ic_header.h_tail_lsn) { tail_lsn = xlog_assign_tail_lsn(log->l_mp); - - if (old_tail_lsn && tail_lsn != old_tail_lsn) - iclog->ic_flags |= XLOG_ICL_NEED_FLUSH; - - if ((iclog->ic_flags & XLOG_ICL_NEED_FUA) && - !iclog->ic_header.h_tail_lsn) - iclog->ic_header.h_tail_lsn = cpu_to_be64(tail_lsn); + iclog->ic_header.h_tail_lsn = cpu_to_be64(tail_lsn); } last_ref = atomic_dec_and_test(&iclog->ic_refcnt); @@ -601,8 +589,6 @@ xlog_state_release_iclog( } iclog->ic_state = XLOG_STATE_SYNCING; - if (!iclog->ic_header.h_tail_lsn) - iclog->ic_header.h_tail_lsn = cpu_to_be64(tail_lsn); xlog_verify_tail_lsn(log, iclog); trace_xlog_iclog_syncing(iclog, _RET_IP_); @@ -875,7 +861,7 @@ xlog_force_iclog( iclog->ic_flags |= XLOG_ICL_NEED_FLUSH | XLOG_ICL_NEED_FUA; if (iclog->ic_state == XLOG_STATE_ACTIVE) xlog_state_switch_iclogs(iclog->ic_log, iclog, 0); - return xlog_state_release_iclog(iclog->ic_log, iclog, 0); + return xlog_state_release_iclog(iclog->ic_log, iclog); } /* @@ -2413,7 +2399,7 @@ xlog_write_copy_finish( ASSERT(iclog->ic_state == XLOG_STATE_WANT_SYNC || xlog_is_shutdown(log)); release_iclog: - error = xlog_state_release_iclog(log, iclog, 0); + error = xlog_state_release_iclog(log, iclog); spin_unlock(&log->l_icloglock); return error; } @@ -2630,7 +2616,7 @@ xlog_write( spin_lock(&log->l_icloglock); xlog_state_finish_copy(log, iclog, record_cnt, data_cnt); - error = xlog_state_release_iclog(log, iclog, 0); + error = xlog_state_release_iclog(log, iclog); spin_unlock(&log->l_icloglock); return error; @@ -3054,7 +3040,7 @@ xlog_state_get_iclog_space( * reference to the iclog. */ if (!atomic_add_unless(&iclog->ic_refcnt, -1, 1)) - error = xlog_state_release_iclog(log, iclog, 0); + error = xlog_state_release_iclog(log, iclog); spin_unlock(&log->l_icloglock); if (error) return error; diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c index b59cc9c0961ce5..eafe30843ff0fe 100644 --- a/fs/xfs/xfs_log_cil.c +++ b/fs/xfs/xfs_log_cil.c @@ -681,11 +681,21 @@ xlog_cil_set_ctx_write_state( * The LSN we need to pass to the log items on transaction * commit is the LSN reported by the first log vector write, not * the commit lsn. If we use the commit record lsn then we can - * move the tail beyond the grant write head. + * move the grant write head beyond the tail LSN and overwrite + * it. */ ctx->start_lsn = lsn; wake_up_all(&cil->xc_start_wait); spin_unlock(&cil->xc_push_lock); + + /* + * Make sure the metadata we are about to overwrite in the log + * has been flushed to stable storage before this iclog is + * issued. + */ + spin_lock(&cil->xc_log->l_icloglock); + iclog->ic_flags |= XLOG_ICL_NEED_FLUSH; + spin_unlock(&cil->xc_log->l_icloglock); return; } @@ -864,10 +874,7 @@ xlog_cil_push_work( struct xfs_trans_header thdr; struct xfs_log_iovec lhdr; struct xfs_log_vec lvhdr = { NULL }; - xfs_lsn_t preflush_tail_lsn; xfs_csn_t push_seq; - struct bio bio; - DECLARE_COMPLETION_ONSTACK(bdev_flush); bool push_commit_stable; new_ctx = xlog_cil_ctx_alloc(); @@ -937,23 +944,6 @@ xlog_cil_push_work( list_add(&ctx->committing, &cil->xc_committing); spin_unlock(&cil->xc_push_lock); - /* - * The CIL is stable at this point - nothing new will be added to it - * because we hold the flush lock exclusively. Hence we can now issue - * a cache flush to ensure all the completed metadata in the journal we - * are about to overwrite is on stable storage. - * - * Because we are issuing this cache flush before we've written the - * tail lsn to the iclog, we can have metadata IO completions move the - * tail forwards between the completion of this flush and the iclog - * being written. In this case, we need to re-issue the cache flush - * before the iclog write. To detect whether the log tail moves, sample - * the tail LSN *before* we issue the flush. - */ - preflush_tail_lsn = atomic64_read(&log->l_tail_lsn); - xfs_flush_bdev_async(&bio, log->l_mp->m_ddev_targp->bt_bdev, - &bdev_flush); - /* * Pull all the log vectors off the items in the CIL, and remove the * items from the CIL. We don't need the CIL lock here because it's only @@ -1030,12 +1020,6 @@ xlog_cil_push_work( lvhdr.lv_iovecp = &lhdr; lvhdr.lv_next = ctx->lv_chain; - /* - * Before we format and submit the first iclog, we have to ensure that - * the metadata writeback ordering cache flush is complete. - */ - wait_for_completion(&bdev_flush); - error = xlog_cil_write_chain(ctx, &lvhdr); if (error) goto out_abort_free_ticket; @@ -1094,7 +1078,7 @@ xlog_cil_push_work( if (push_commit_stable && ctx->commit_iclog->ic_state == XLOG_STATE_ACTIVE) xlog_state_switch_iclogs(log, ctx->commit_iclog, 0); - xlog_state_release_iclog(log, ctx->commit_iclog, preflush_tail_lsn); + xlog_state_release_iclog(log, ctx->commit_iclog); /* Not safe to reference ctx now! */ @@ -1115,7 +1099,7 @@ xlog_cil_push_work( return; } spin_lock(&log->l_icloglock); - xlog_state_release_iclog(log, ctx->commit_iclog, 0); + xlog_state_release_iclog(log, ctx->commit_iclog); /* Not safe to reference ctx now! */ spin_unlock(&log->l_icloglock); } diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h index 844fbeec3545ab..f3d68ca39f45c9 100644 --- a/fs/xfs/xfs_log_priv.h +++ b/fs/xfs/xfs_log_priv.h @@ -524,8 +524,7 @@ void xfs_log_ticket_regrant(struct xlog *log, struct xlog_ticket *ticket); void xlog_state_switch_iclogs(struct xlog *log, struct xlog_in_core *iclog, int eventual_size); -int xlog_state_release_iclog(struct xlog *log, struct xlog_in_core *iclog, - xfs_lsn_t log_tail_lsn); +int xlog_state_release_iclog(struct xlog *log, struct xlog_in_core *iclog); /* * When we crack an atomic LSN, we sample it first so that the value will not From 3bbe6437c1d51edb3f5689fecbd0ff4cc8f8fe3d Mon Sep 17 00:00:00 2001 From: Serge Semin Date: Fri, 24 Jun 2022 17:18:45 +0300 Subject: [PATCH 0523/1056] reset: Fix devm bulk optional exclusive control getter [ Upstream commit a57f68ddc8865d59a19783080cc52fb4a11dc209 ] Most likely due to copy-paste mistake the device managed version of the denoted reset control getter has been implemented with invalid semantic, which can be immediately spotted by having "WARN_ON(shared && acquired)" warning in the system log as soon as the method is called. Anyway let's fix it by altering the boolean arguments passed to the __devm_reset_control_bulk_get() method from - shared = true, optional = false, acquired = true to + shared = false, optional = true, acquired = true That's what they were supposed to be in the first place (see the non-devm version of the same method: reset_control_bulk_get_optional_exclusive()). Fixes: 48d71395896d ("reset: Add reset_control_bulk API") Signed-off-by: Serge Semin Reviewed-by: Dmitry Osipenko Signed-off-by: Philipp Zabel Link: https://lore.kernel.org/r/20220624141853.7417-2-Sergey.Semin@baikalelectronics.ru Signed-off-by: Sasha Levin --- include/linux/reset.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/reset.h b/include/linux/reset.h index db0e6115a2f6ad..7bb58373752892 100644 --- a/include/linux/reset.h +++ b/include/linux/reset.h @@ -711,7 +711,7 @@ static inline int __must_check devm_reset_control_bulk_get_optional_exclusive(struct device *dev, int num_rstcs, struct reset_control_bulk_data *rstcs) { - return __devm_reset_control_bulk_get(dev, num_rstcs, rstcs, true, false, true); + return __devm_reset_control_bulk_get(dev, num_rstcs, rstcs, false, true, true); } /** From 28ad09b0d7b1eabf31e0eea4c5faa23d538fbac5 Mon Sep 17 00:00:00 2001 From: Kris Bahnsen Date: Thu, 30 Jun 2022 14:03:27 -0700 Subject: [PATCH 0524/1056] ARM: dts: imx6qdl-ts7970: Fix ngpio typo and count [ Upstream commit e95ea0f687e679fcb0a3a67d0755b81ee7d60db0 ] Device-tree incorrectly used "ngpio" which caused the driver to fallback to 32 ngpios. This platform has 62 GPIO registers. Fixes: 9ff8e9fccef9 ("ARM: dts: TS-7970: add basic device tree") Signed-off-by: Kris Bahnsen Reviewed-by: Fabio Estevam Signed-off-by: Shawn Guo Signed-off-by: Sasha Levin --- arch/arm/boot/dts/imx6qdl-ts7970.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/imx6qdl-ts7970.dtsi b/arch/arm/boot/dts/imx6qdl-ts7970.dtsi index fded07f370b390..d6ba4b2a60f6f5 100644 --- a/arch/arm/boot/dts/imx6qdl-ts7970.dtsi +++ b/arch/arm/boot/dts/imx6qdl-ts7970.dtsi @@ -226,7 +226,7 @@ reg = <0x28>; #gpio-cells = <2>; gpio-controller; - ngpio = <32>; + ngpios = <62>; }; sgtl5000: codec@a { From 2bcb2e42a500e5349f7e783bbda7d3b88d820281 Mon Sep 17 00:00:00 2001 From: Cristian Ciocaltea Date: Wed, 6 Jul 2022 13:06:22 +0300 Subject: [PATCH 0525/1056] spi: amd: Limit max transfer and message size [ Upstream commit 6ece49c56965544262523dae4a071ace3db63507 ] Enabling the SPI CS35L41 audio codec driver for Steam Deck [1] revealed a problem with the current AMD SPI controller driver implementation, consisting of an unrecoverable system hang. The issue can be prevented if we ensure the max transfer size and the max message size do not exceed the FIFO buffer size. According to the implementation of the downstream driver, the AMD SPI controller is not able to handle more than 70 bytes per transfer, which corresponds to the size of the FIFO buffer. Hence, let's fix this by setting the SPI limits mentioned above. [1] https://lore.kernel.org/r/20220621213819.262537-1-cristian.ciocaltea@collabora.com Reported-by: Anastasios Vacharakis Fixes: bbb336f39efc ("spi: spi-amd: Add AMD SPI controller driver support") Signed-off-by: Cristian Ciocaltea Link: https://lore.kernel.org/r/20220706100626.1234731-2-cristian.ciocaltea@collabora.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- drivers/spi/spi-amd.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/spi/spi-amd.c b/drivers/spi/spi-amd.c index 3cf76096a76d85..39dbe9903da2cc 100644 --- a/drivers/spi/spi-amd.c +++ b/drivers/spi/spi-amd.c @@ -28,6 +28,7 @@ #define AMD_SPI_RX_COUNT_REG 0x4B #define AMD_SPI_STATUS_REG 0x4C +#define AMD_SPI_FIFO_SIZE 70 #define AMD_SPI_MEM_SIZE 200 /* M_CMD OP codes for SPI */ @@ -245,6 +246,11 @@ static int amd_spi_master_transfer(struct spi_master *master, return 0; } +static size_t amd_spi_max_transfer_size(struct spi_device *spi) +{ + return AMD_SPI_FIFO_SIZE; +} + static int amd_spi_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; @@ -275,6 +281,8 @@ static int amd_spi_probe(struct platform_device *pdev) master->flags = SPI_MASTER_HALF_DUPLEX; master->setup = amd_spi_master_setup; master->transfer_one_message = amd_spi_master_transfer; + master->max_transfer_size = amd_spi_max_transfer_size; + master->max_message_size = amd_spi_max_transfer_size; /* Register the controller with SPI framework */ err = devm_spi_register_master(dev, master); From 443838e6ff0f605d791c59d0c0e46b860d793622 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 31 May 2022 09:53:42 +0100 Subject: [PATCH 0526/1056] ARM: 9209/1: Spectre-BHB: avoid pr_info() every time a CPU comes out of idle [ Upstream commit 0609e200246bfd3b7516091c491bec4308349055 ] Jon reports that the Spectre-BHB init code is filling up the kernel log with spurious notifications about which mitigation has been enabled, every time any CPU comes out of a low power state. Given that Spectre-BHB mitigations are system wide, only a single mitigation can be enabled, and we already print an error if two types of CPUs coexist in a single system that require different Spectre-BHB mitigations. This means that the pr_info() that describes the selected mitigation does not need to be emitted for each CPU anyway, and so we can simply emit it only once. In order to clarify the above in the log message, update it to describe that the selected mitigation will be enabled on all CPUs, including ones that are unaffected. If another CPU comes up later that is affected and requires a different mitigation, we report an error as before. Fixes: b9baf5c8c5c3 ("ARM: Spectre-BHB workaround") Tested-by: Jon Hunter Signed-off-by: Ard Biesheuvel Signed-off-by: Russell King (Oracle) Signed-off-by: Sasha Levin --- arch/arm/mm/proc-v7-bugs.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm/mm/proc-v7-bugs.c b/arch/arm/mm/proc-v7-bugs.c index f9730eba063285..8bc7a2d6d6c7f9 100644 --- a/arch/arm/mm/proc-v7-bugs.c +++ b/arch/arm/mm/proc-v7-bugs.c @@ -208,10 +208,10 @@ static int spectre_bhb_install_workaround(int method) return SPECTRE_VULNERABLE; spectre_bhb_method = method; - } - pr_info("CPU%u: Spectre BHB: using %s workaround\n", - smp_processor_id(), spectre_bhb_method_name(method)); + pr_info("CPU%u: Spectre BHB: enabling %s workaround for all CPUs\n", + smp_processor_id(), spectre_bhb_method_name(method)); + } return SPECTRE_MITIGATED; } From ba27a912f99efe7d803be16c7617439dd15fc6b7 Mon Sep 17 00:00:00 2001 From: Zhen Lei Date: Mon, 13 Jun 2022 15:05:41 +0100 Subject: [PATCH 0527/1056] ARM: 9210/1: Mark the FDT_FIXED sections as shareable [ Upstream commit 598f0a99fa8a35be44b27106b43ddc66417af3b1 ] commit 7a1be318f579 ("ARM: 9012/1: move device tree mapping out of linear region") use FDT_FIXED_BASE to map the whole FDT_FIXED_SIZE memory area which contains fdt. But it only reserves the exact physical memory that fdt occupied. Unfortunately, this mapping is non-shareable. An illegal or speculative read access can bring the RAM content from non-fdt zone into cache, PIPT makes it to be hit by subsequently read access through shareable mapping(such as linear mapping), and the cache consistency between cores is lost due to non-shareable property. |<---------FDT_FIXED_SIZE------>| | | ------------------------------- | | | | ------------------------------- 1. CoreA read through MT_ROM mapping, the old data is loaded into the cache. 2. CoreB write to update data through linear mapping. CoreA received the notification to invalid the corresponding cachelines, but the property non-shareable makes it to be ignored. 3. CoreA read through linear mapping, cache hit, the old data is read. To eliminate this risk, add a new memory type MT_MEMORY_RO. Compared to MT_ROM, it is shareable and non-executable. Here's an example: list_del corruption. prev->next should be c0ecbf74, but was c08410dc kernel BUG at lib/list_debug.c:53! ... ... PC is at __list_del_entry_valid+0x58/0x98 LR is at __list_del_entry_valid+0x58/0x98 psr: 60000093 sp : c0ecbf30 ip : 00000000 fp : 00000001 r10: c08410d0 r9 : 00000001 r8 : c0825e0c r7 : 20000013 r6 : c08410d0 r5 : c0ecbf74 r4 : c0ecbf74 r3 : c0825d08 r2 : 00000000 r1 : df7ce6f4 r0 : 00000044 ... ... Stack: (0xc0ecbf30 to 0xc0ecc000) bf20: c0ecbf74 c0164fd0 c0ecbf70 c0165170 bf40: c0eca000 c0840c00 c0840c00 c0824500 c0825e0c c0189bbc c088f404 60000013 bf60: 60000013 c0e85100 000004ec 00000000 c0ebcdc0 c0ecbf74 c0ecbf74 c0825d08 ... ... < next prev > (__list_del_entry_valid) from (__list_del_entry+0xc/0x20) (__list_del_entry) from (finish_swait+0x60/0x7c) (finish_swait) from (rcu_gp_kthread+0x560/0xa20) (rcu_gp_kthread) from (kthread+0x14c/0x15c) (kthread) from (ret_from_fork+0x14/0x24) The faulty list node to be deleted is a local variable, its address is c0ecbf74. The dumped stack shows that 'prev' = c0ecbf74, but its value before lib/list_debug.c:53 is c08410dc. A large amount of printing results in swapping out the cacheline containing the old data(MT_ROM mapping is read only, so the cacheline cannot be dirty), and the subsequent dump operation obtains new data from the DDR. Fixes: 7a1be318f579 ("ARM: 9012/1: move device tree mapping out of linear region") Suggested-by: Ard Biesheuvel Signed-off-by: Zhen Lei Reviewed-by: Ard Biesheuvel Reviewed-by: Kefeng Wang Signed-off-by: Russell King (Oracle) Signed-off-by: Sasha Levin --- arch/arm/include/asm/mach/map.h | 1 + arch/arm/mm/mmu.c | 15 ++++++++++++++- 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/arch/arm/include/asm/mach/map.h b/arch/arm/include/asm/mach/map.h index 92282558caf7cd..2b8970d8e5a2ff 100644 --- a/arch/arm/include/asm/mach/map.h +++ b/arch/arm/include/asm/mach/map.h @@ -27,6 +27,7 @@ enum { MT_HIGH_VECTORS, MT_MEMORY_RWX, MT_MEMORY_RW, + MT_MEMORY_RO, MT_ROM, MT_MEMORY_RWX_NONCACHED, MT_MEMORY_RW_DTCM, diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index 5e2be37a198e29..cd17e324aa51ea 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -296,6 +296,13 @@ static struct mem_type mem_types[] __ro_after_init = { .prot_sect = PMD_TYPE_SECT | PMD_SECT_AP_WRITE, .domain = DOMAIN_KERNEL, }, + [MT_MEMORY_RO] = { + .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY | + L_PTE_XN | L_PTE_RDONLY, + .prot_l1 = PMD_TYPE_TABLE, + .prot_sect = PMD_TYPE_SECT, + .domain = DOMAIN_KERNEL, + }, [MT_ROM] = { .prot_sect = PMD_TYPE_SECT, .domain = DOMAIN_KERNEL, @@ -489,6 +496,7 @@ static void __init build_mem_type_table(void) /* Also setup NX memory mapping */ mem_types[MT_MEMORY_RW].prot_sect |= PMD_SECT_XN; + mem_types[MT_MEMORY_RO].prot_sect |= PMD_SECT_XN; } if (cpu_arch >= CPU_ARCH_ARMv7 && (cr & CR_TRE)) { /* @@ -568,6 +576,7 @@ static void __init build_mem_type_table(void) mem_types[MT_ROM].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE; mem_types[MT_MINICLEAN].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE; mem_types[MT_CACHECLEAN].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE; + mem_types[MT_MEMORY_RO].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE; #endif /* @@ -587,6 +596,8 @@ static void __init build_mem_type_table(void) mem_types[MT_MEMORY_RWX].prot_pte |= L_PTE_SHARED; mem_types[MT_MEMORY_RW].prot_sect |= PMD_SECT_S; mem_types[MT_MEMORY_RW].prot_pte |= L_PTE_SHARED; + mem_types[MT_MEMORY_RO].prot_sect |= PMD_SECT_S; + mem_types[MT_MEMORY_RO].prot_pte |= L_PTE_SHARED; mem_types[MT_MEMORY_DMA_READY].prot_pte |= L_PTE_SHARED; mem_types[MT_MEMORY_RWX_NONCACHED].prot_sect |= PMD_SECT_S; mem_types[MT_MEMORY_RWX_NONCACHED].prot_pte |= L_PTE_SHARED; @@ -647,6 +658,8 @@ static void __init build_mem_type_table(void) mem_types[MT_MEMORY_RWX].prot_pte |= kern_pgprot; mem_types[MT_MEMORY_RW].prot_sect |= ecc_mask | cp->pmd; mem_types[MT_MEMORY_RW].prot_pte |= kern_pgprot; + mem_types[MT_MEMORY_RO].prot_sect |= ecc_mask | cp->pmd; + mem_types[MT_MEMORY_RO].prot_pte |= kern_pgprot; mem_types[MT_MEMORY_DMA_READY].prot_pte |= kern_pgprot; mem_types[MT_MEMORY_RWX_NONCACHED].prot_sect |= ecc_mask; mem_types[MT_ROM].prot_sect |= cp->pmd; @@ -1360,7 +1373,7 @@ static void __init devicemaps_init(const struct machine_desc *mdesc) map.pfn = __phys_to_pfn(__atags_pointer & SECTION_MASK); map.virtual = FDT_FIXED_BASE; map.length = FDT_FIXED_SIZE; - map.type = MT_ROM; + map.type = MT_MEMORY_RO; create_mapping(&map); } From 2ee2ef846fa35ccb37baa321f5db1206230df8fb Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Mon, 6 Jun 2022 21:20:29 +0300 Subject: [PATCH 0528/1056] net/mlx5e: kTLS, Fix build time constant test in TX [ Upstream commit 6cc2714e85754a621219693ea8aa3077d6fca0cb ] Use the correct constant (TLS_DRIVER_STATE_SIZE_TX) in the comparison against the size of the private TX TLS driver context. Fixes: df8d866770f9 ("net/mlx5e: kTLS, Use kernel API to extract private offload context") Signed-off-by: Tariq Toukan Reviewed-by: Maxim Mikityanskiy Signed-off-by: Saeed Mahameed Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c index 9ad3459fb63a61..dadb71081ed06b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c @@ -68,8 +68,7 @@ mlx5e_set_ktls_tx_priv_ctx(struct tls_context *tls_ctx, struct mlx5e_ktls_offload_context_tx **ctx = __tls_driver_ctx(tls_ctx, TLS_OFFLOAD_CTX_DIR_TX); - BUILD_BUG_ON(sizeof(struct mlx5e_ktls_offload_context_tx *) > - TLS_OFFLOAD_CONTEXT_SIZE_TX); + BUILD_BUG_ON(sizeof(priv_tx) > TLS_DRIVER_STATE_SIZE_TX); *ctx = priv_tx; } From 5adcc5ded58ad6333f101030c5882178dbeb41f2 Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Mon, 6 Jun 2022 21:21:10 +0300 Subject: [PATCH 0529/1056] net/mlx5e: kTLS, Fix build time constant test in RX [ Upstream commit 2ec6cf9b742a5c18982861322fa5de6510f8f57e ] Use the correct constant (TLS_DRIVER_STATE_SIZE_RX) in the comparison against the size of the private RX TLS driver context. Fixes: 1182f3659357 ("net/mlx5e: kTLS, Add kTLS RX HW offload support") Signed-off-by: Tariq Toukan Reviewed-by: Maxim Mikityanskiy Signed-off-by: Saeed Mahameed Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c index 15711814d2d28d..d92b97c56f4cd7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c @@ -231,8 +231,7 @@ mlx5e_set_ktls_rx_priv_ctx(struct tls_context *tls_ctx, struct mlx5e_ktls_offload_context_rx **ctx = __tls_driver_ctx(tls_ctx, TLS_OFFLOAD_CTX_DIR_RX); - BUILD_BUG_ON(sizeof(struct mlx5e_ktls_offload_context_rx *) > - TLS_OFFLOAD_CONTEXT_SIZE_RX); + BUILD_BUG_ON(sizeof(priv_rx) > TLS_DRIVER_STATE_SIZE_RX); *ctx = priv_rx; } From 3a5e734ec002905dabb6fb8fff6b5b65be0472f0 Mon Sep 17 00:00:00 2001 From: Paul Blakey Date: Wed, 22 Jun 2022 13:11:18 +0300 Subject: [PATCH 0530/1056] net/mlx5e: Fix enabling sriov while tc nic rules are offloaded [ Upstream commit 0c9d876545a56aebed30fa306d0460a4d28d271a ] There is a total of four 4M entries flow tables. In sriov disabled mode, ct, ct_nat and post_act take three of them. When adding the first tc nic rule in this mode, it will take another 4M table for the tc table. If user then enables sriov, the legacy flow table tries to take another 4M and fails, and so enablement fails. To fix that, have legacy fdb take the next available maximum size from the fs ft pool. Fixes: 4a98544d1827 ("net/mlx5: Move chains ft pool to be used by all firmware steering") Signed-off-by: Paul Blakey Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c index 0c4c743ca31e17..3a2575dc5355d2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c @@ -11,6 +11,7 @@ #include "mlx5_core.h" #include "eswitch.h" #include "fs_core.h" +#include "fs_ft_pool.h" #include "esw/qos.h" enum { @@ -95,8 +96,7 @@ static int esw_create_legacy_fdb_table(struct mlx5_eswitch *esw) if (!flow_group_in) return -ENOMEM; - table_size = BIT(MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size)); - ft_attr.max_fte = table_size; + ft_attr.max_fte = POOL_NEXT_SIZE; ft_attr.prio = LEGACY_FDB_PRIO; fdb = mlx5_create_flow_table(root_ns, &ft_attr); if (IS_ERR(fdb)) { @@ -105,6 +105,7 @@ static int esw_create_legacy_fdb_table(struct mlx5_eswitch *esw) goto out; } esw->fdb_table.legacy.fdb = fdb; + table_size = fdb->max_fte; /* Addresses group : Full match unicast/multicast addresses */ MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, From 663a06e9620329b638a1711b5fdb579417ccafc8 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Mon, 27 Jun 2022 15:05:53 +0300 Subject: [PATCH 0531/1056] net/mlx5e: Fix capability check for updating vnic env counters [ Upstream commit 452133dd580811f184e76b1402983182ee425298 ] The existing capability check for vnic env counters only checks for receive steering discards, although we need the counters update for the exposed internal queue oob counter as well. This could result in the latter counter not being updated correctly when the receive steering discards counter is not supported. Fix that by checking whether any counter is supported instead of only the steering counter capability. Fixes: 0cfafd4b4ddf ("net/mlx5e: Add device out of buffer counter") Signed-off-by: Gal Pressman Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlx5/core/en_stats.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c index e1dd17019030e6..5a5c6eda29d286 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c @@ -614,7 +614,7 @@ static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(vnic_env) u32 in[MLX5_ST_SZ_DW(query_vnic_env_in)] = {}; struct mlx5_core_dev *mdev = priv->mdev; - if (!MLX5_CAP_GEN(priv->mdev, nic_receive_steering_discard)) + if (!mlx5e_stats_grp_vnic_env_num_stats(priv)) return; MLX5_SET(query_vnic_env_in, in, opcode, MLX5_CMD_OP_QUERY_VNIC_ENV); From c6e1c5c0c19da31ae3f6581b7dccf5668d75c156 Mon Sep 17 00:00:00 2001 From: Maxim Mikityanskiy Date: Mon, 30 May 2022 14:01:37 +0300 Subject: [PATCH 0532/1056] net/mlx5e: Ring the TX doorbell on DMA errors [ Upstream commit 5b759bf2f9d73db05369aef2344502095c4e5e73 ] TX doorbells may be postponed, because sometimes the driver knows that another packet follows (for example, when xmit_more is true, or when a MPWQE session is closed before transmitting a packet). However, the DMA mapping may fail for the next packet, in which case a new WQE is not posted, the doorbell isn't updated either, and the transmission of the previous packet will be delayed indefinitely. This commit fixes the described rare error flow by posting a NOP and ringing the doorbell on errors to flush all the previous packets. The MPWQE session is closed before that. DMA mapping in the MPWQE flow is moved to the beginning of mlx5e_sq_xmit_mpwqe, because empty sessions are not allowed. Stop room always has enough space for a NOP, because the actual TX WQE is not posted. Fixes: e586b3b0baee ("net/mlx5: Ethernet Datapath files") Signed-off-by: Maxim Mikityanskiy Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed Signed-off-by: Sasha Levin --- .../net/ethernet/mellanox/mlx5/core/en_tx.c | 39 ++++++++++++++----- 1 file changed, 30 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index 7fd33b356cc8d1..1544d4c2c63604 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -429,6 +429,26 @@ static void mlx5e_tx_check_stop(struct mlx5e_txqsq *sq) } } +static void mlx5e_tx_flush(struct mlx5e_txqsq *sq) +{ + struct mlx5e_tx_wqe_info *wi; + struct mlx5e_tx_wqe *wqe; + u16 pi; + + /* Must not be called when a MPWQE session is active but empty. */ + mlx5e_tx_mpwqe_ensure_complete(sq); + + pi = mlx5_wq_cyc_ctr2ix(&sq->wq, sq->pc); + wi = &sq->db.wqe_info[pi]; + + *wi = (struct mlx5e_tx_wqe_info) { + .num_wqebbs = 1, + }; + + wqe = mlx5e_post_nop(&sq->wq, sq->sqn, &sq->pc); + mlx5e_notify_hw(&sq->wq, sq->pc, sq->uar_map, &wqe->ctrl); +} + static inline void mlx5e_txwqe_complete(struct mlx5e_txqsq *sq, struct sk_buff *skb, const struct mlx5e_tx_attr *attr, @@ -521,6 +541,7 @@ mlx5e_sq_xmit_wqe(struct mlx5e_txqsq *sq, struct sk_buff *skb, err_drop: stats->dropped++; dev_kfree_skb_any(skb); + mlx5e_tx_flush(sq); } static bool mlx5e_tx_skb_supports_mpwqe(struct sk_buff *skb, struct mlx5e_tx_attr *attr) @@ -622,6 +643,13 @@ mlx5e_sq_xmit_mpwqe(struct mlx5e_txqsq *sq, struct sk_buff *skb, struct mlx5_wqe_ctrl_seg *cseg; struct mlx5e_xmit_data txd; + txd.data = skb->data; + txd.len = skb->len; + + txd.dma_addr = dma_map_single(sq->pdev, txd.data, txd.len, DMA_TO_DEVICE); + if (unlikely(dma_mapping_error(sq->pdev, txd.dma_addr))) + goto err_unmap; + if (!mlx5e_tx_mpwqe_session_is_active(sq)) { mlx5e_tx_mpwqe_session_start(sq, eseg); } else if (!mlx5e_tx_mpwqe_same_eseg(sq, eseg)) { @@ -631,18 +659,9 @@ mlx5e_sq_xmit_mpwqe(struct mlx5e_txqsq *sq, struct sk_buff *skb, sq->stats->xmit_more += xmit_more; - txd.data = skb->data; - txd.len = skb->len; - - txd.dma_addr = dma_map_single(sq->pdev, txd.data, txd.len, DMA_TO_DEVICE); - if (unlikely(dma_mapping_error(sq->pdev, txd.dma_addr))) - goto err_unmap; mlx5e_dma_push(sq, txd.dma_addr, txd.len, MLX5E_DMA_MAP_SINGLE); - mlx5e_skb_fifo_push(&sq->db.skb_fifo, skb); - mlx5e_tx_mpwqe_add_dseg(sq, &txd); - mlx5e_tx_skb_update_hwts_flags(skb); if (unlikely(mlx5e_tx_mpwqe_is_full(&sq->mpwqe))) { @@ -664,6 +683,7 @@ mlx5e_sq_xmit_mpwqe(struct mlx5e_txqsq *sq, struct sk_buff *skb, mlx5e_dma_unmap_wqe_err(sq, 1); sq->stats->dropped++; dev_kfree_skb_any(skb); + mlx5e_tx_flush(sq); } void mlx5e_tx_mpwqe_ensure_complete(struct mlx5e_txqsq *sq) @@ -1033,5 +1053,6 @@ void mlx5i_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb, err_drop: stats->dropped++; dev_kfree_skb_any(skb); + mlx5e_tx_flush(sq); } #endif From 505114dda5bbfd07f4ce9a2df5b7d8ef5f2a1218 Mon Sep 17 00:00:00 2001 From: Hangyu Hua Date: Fri, 24 Jun 2022 06:04:06 -0700 Subject: [PATCH 0533/1056] drm/i915: fix a possible refcount leak in intel_dp_add_mst_connector() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 85144df9ff4652816448369de76897c57cbb1b93 ] If drm_connector_init fails, intel_connector_free will be called to take care of proper free. So it is necessary to drop the refcount of port before intel_connector_free. Fixes: 091a4f91942a ("drm/i915: Handle drm-layer errors in intel_dp_add_mst_connector") Signed-off-by: Hangyu Hua Reviewed-by: José Roberto de Souza Link: https://patchwork.freedesktop.org/patch/msgid/20220624130406.17996-1-jose.souza@intel.com Signed-off-by: José Roberto de Souza (cherry picked from commit cea9ed611e85d36a05db52b6457bf584b7d969e2) Signed-off-by: Rodrigo Vivi Signed-off-by: Sasha Levin --- drivers/gpu/drm/i915/display/intel_dp_mst.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c index 8d13d7b26a25b2..2a20487effccdb 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c @@ -817,6 +817,7 @@ static struct drm_connector *intel_dp_add_mst_connector(struct drm_dp_mst_topolo ret = drm_connector_init(dev, connector, &intel_dp_mst_connector_funcs, DRM_MODE_CONNECTOR_DisplayPort); if (ret) { + drm_dp_mst_put_port_malloc(port); intel_connector_free(intel_connector); return NULL; } From c8d5d81940938b5f6c0f495ca9538e7740416f30 Mon Sep 17 00:00:00 2001 From: Huaxin Lu Date: Tue, 5 Jul 2022 13:14:17 +0800 Subject: [PATCH 0534/1056] ima: Fix a potential integer overflow in ima_appraise_measurement [ Upstream commit d2ee2cfc4aa85ff6a2a3b198a3a524ec54e3d999 ] When the ima-modsig is enabled, the rc passed to evm_verifyxattr() may be negative, which may cause the integer overflow problem. Fixes: 39b07096364a ("ima: Implement support for module-style appended signatures") Signed-off-by: Huaxin Lu Signed-off-by: Mimi Zohar Signed-off-by: Sasha Levin --- security/integrity/ima/ima_appraise.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/security/integrity/ima/ima_appraise.c b/security/integrity/ima/ima_appraise.c index dbba51583e7c13..ed04bb7c751200 100644 --- a/security/integrity/ima/ima_appraise.c +++ b/security/integrity/ima/ima_appraise.c @@ -408,7 +408,8 @@ int ima_appraise_measurement(enum ima_hooks func, goto out; } - status = evm_verifyxattr(dentry, XATTR_NAME_IMA, xattr_value, rc, iint); + status = evm_verifyxattr(dentry, XATTR_NAME_IMA, xattr_value, + rc < 0 ? 0 : rc, iint); switch (status) { case INTEGRITY_PASS: case INTEGRITY_PASS_IMMUTABLE: From 9b9773cc55d8f35be574c6f7d912a3d28522be07 Mon Sep 17 00:00:00 2001 From: Francesco Dolcini Date: Fri, 24 Jun 2022 12:13:01 +0200 Subject: [PATCH 0535/1056] ASoC: sgtl5000: Fix noise on shutdown/remove [ Upstream commit 040e3360af3736348112d29425bf5d0be5b93115 ] Put the SGTL5000 in a silent/safe state on shutdown/remove, this is required since the SGTL5000 produces a constant noise on its output after it is configured and its clock is removed. Without this change this is happening every time the module is unbound/removed or from reboot till the clock is enabled again. The issue was experienced on both a Toradex Colibri/Apalis iMX6, but can be easily reproduced everywhere just playing something on the codec and after that removing/unbinding the driver. Fixes: 9b34e6cc3bc2 ("ASoC: Add Freescale SGTL5000 codec support") Signed-off-by: Francesco Dolcini Reviewed-by: Fabio Estevam Link: https://lore.kernel.org/r/20220624101301.441314-1-francesco.dolcini@toradex.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/codecs/sgtl5000.c | 9 +++++++++ sound/soc/codecs/sgtl5000.h | 1 + 2 files changed, 10 insertions(+) diff --git a/sound/soc/codecs/sgtl5000.c b/sound/soc/codecs/sgtl5000.c index 97bf1f222805e0..dc56e6c6b66853 100644 --- a/sound/soc/codecs/sgtl5000.c +++ b/sound/soc/codecs/sgtl5000.c @@ -1797,6 +1797,9 @@ static int sgtl5000_i2c_remove(struct i2c_client *client) { struct sgtl5000_priv *sgtl5000 = i2c_get_clientdata(client); + regmap_write(sgtl5000->regmap, SGTL5000_CHIP_DIG_POWER, SGTL5000_DIG_POWER_DEFAULT); + regmap_write(sgtl5000->regmap, SGTL5000_CHIP_ANA_POWER, SGTL5000_ANA_POWER_DEFAULT); + clk_disable_unprepare(sgtl5000->mclk); regulator_bulk_disable(sgtl5000->num_supplies, sgtl5000->supplies); regulator_bulk_free(sgtl5000->num_supplies, sgtl5000->supplies); @@ -1804,6 +1807,11 @@ static int sgtl5000_i2c_remove(struct i2c_client *client) return 0; } +static void sgtl5000_i2c_shutdown(struct i2c_client *client) +{ + sgtl5000_i2c_remove(client); +} + static const struct i2c_device_id sgtl5000_id[] = { {"sgtl5000", 0}, {}, @@ -1824,6 +1832,7 @@ static struct i2c_driver sgtl5000_i2c_driver = { }, .probe = sgtl5000_i2c_probe, .remove = sgtl5000_i2c_remove, + .shutdown = sgtl5000_i2c_shutdown, .id_table = sgtl5000_id, }; diff --git a/sound/soc/codecs/sgtl5000.h b/sound/soc/codecs/sgtl5000.h index 56ec5863f25071..3a808c762299e5 100644 --- a/sound/soc/codecs/sgtl5000.h +++ b/sound/soc/codecs/sgtl5000.h @@ -80,6 +80,7 @@ /* * SGTL5000_CHIP_DIG_POWER */ +#define SGTL5000_DIG_POWER_DEFAULT 0x0000 #define SGTL5000_ADC_EN 0x0040 #define SGTL5000_DAC_EN 0x0020 #define SGTL5000_DAP_POWERUP 0x0010 From 7dc0ae04c04ef2562188d83c32a2c5c231751d33 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Povi=C5=A1er?= Date: Thu, 30 Jun 2022 09:51:32 +0200 Subject: [PATCH 0536/1056] ASoC: tas2764: Add post reset delays MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit cd10bb89b0d57bca98eb75e0444854a1c129a14e ] Make sure there is at least 1 ms delay from reset to first command as is specified in the datasheet. This is a fix similar to commit 307f31452078 ("ASoC: tas2770: Insert post reset delay"). Fixes: 827ed8a0fa50 ("ASoC: tas2764: Add the driver for the TAS2764") Signed-off-by: Martin Povišer Link: https://lore.kernel.org/r/20220630075135.2221-1-povik+lin@cutebit.org Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/codecs/tas2764.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/sound/soc/codecs/tas2764.c b/sound/soc/codecs/tas2764.c index 9265af41c235db..edc66ff6dc49f5 100644 --- a/sound/soc/codecs/tas2764.c +++ b/sound/soc/codecs/tas2764.c @@ -42,10 +42,12 @@ static void tas2764_reset(struct tas2764_priv *tas2764) gpiod_set_value_cansleep(tas2764->reset_gpio, 0); msleep(20); gpiod_set_value_cansleep(tas2764->reset_gpio, 1); + usleep_range(1000, 2000); } snd_soc_component_write(tas2764->component, TAS2764_SW_RST, TAS2764_RST); + usleep_range(1000, 2000); } static int tas2764_set_bias_level(struct snd_soc_component *component, @@ -107,8 +109,10 @@ static int tas2764_codec_resume(struct snd_soc_component *component) struct tas2764_priv *tas2764 = snd_soc_component_get_drvdata(component); int ret; - if (tas2764->sdz_gpio) + if (tas2764->sdz_gpio) { gpiod_set_value_cansleep(tas2764->sdz_gpio, 1); + usleep_range(1000, 2000); + } ret = snd_soc_component_update_bits(component, TAS2764_PWR_CTRL, TAS2764_PWR_CTRL_MASK, @@ -501,8 +505,10 @@ static int tas2764_codec_probe(struct snd_soc_component *component) tas2764->component = component; - if (tas2764->sdz_gpio) + if (tas2764->sdz_gpio) { gpiod_set_value_cansleep(tas2764->sdz_gpio, 1); + usleep_range(1000, 2000); + } tas2764_reset(tas2764); From 1230d3e4b8846ea878d097718a3ad3ac219f317d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Povi=C5=A1er?= Date: Thu, 30 Jun 2022 09:51:33 +0200 Subject: [PATCH 0537/1056] ASoC: tas2764: Fix and extend FSYNC polarity handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit d1a10f1b48202e2d183cce144c218a211e98d906 ] Fix setting of FSYNC polarity in case of LEFT_J and DSP_A/B formats. Do NOT set the SCFG field as was previously done, because that is not correct and is also in conflict with the "ASI1 Source" control which sets the same SCFG field! Also add support for explicit polarity inversion. Fixes: 827ed8a0fa50 ("ASoC: tas2764: Add the driver for the TAS2764") Signed-off-by: Martin Povišer Link: https://lore.kernel.org/r/20220630075135.2221-2-povik+lin@cutebit.org Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/codecs/tas2764.c | 30 +++++++++++++++++------------- sound/soc/codecs/tas2764.h | 6 ++---- 2 files changed, 19 insertions(+), 17 deletions(-) diff --git a/sound/soc/codecs/tas2764.c b/sound/soc/codecs/tas2764.c index edc66ff6dc49f5..46c815650b2c00 100644 --- a/sound/soc/codecs/tas2764.c +++ b/sound/soc/codecs/tas2764.c @@ -135,7 +135,8 @@ static const char * const tas2764_ASI1_src[] = { }; static SOC_ENUM_SINGLE_DECL( - tas2764_ASI1_src_enum, TAS2764_TDM_CFG2, 4, tas2764_ASI1_src); + tas2764_ASI1_src_enum, TAS2764_TDM_CFG2, TAS2764_TDM_CFG2_SCFG_SHIFT, + tas2764_ASI1_src); static const struct snd_kcontrol_new tas2764_asi1_mux = SOC_DAPM_ENUM("ASI1 Source", tas2764_ASI1_src_enum); @@ -333,20 +334,22 @@ static int tas2764_set_fmt(struct snd_soc_dai *dai, unsigned int fmt) { struct snd_soc_component *component = dai->component; struct tas2764_priv *tas2764 = snd_soc_component_get_drvdata(component); - u8 tdm_rx_start_slot = 0, asi_cfg_1 = 0; - int iface; + u8 tdm_rx_start_slot = 0, asi_cfg_0 = 0, asi_cfg_1 = 0; int ret; switch (fmt & SND_SOC_DAIFMT_INV_MASK) { + case SND_SOC_DAIFMT_NB_IF: + asi_cfg_0 ^= TAS2764_TDM_CFG0_FRAME_START; + fallthrough; case SND_SOC_DAIFMT_NB_NF: asi_cfg_1 = TAS2764_TDM_CFG1_RX_RISING; break; + case SND_SOC_DAIFMT_IB_IF: + asi_cfg_0 ^= TAS2764_TDM_CFG0_FRAME_START; + fallthrough; case SND_SOC_DAIFMT_IB_NF: asi_cfg_1 = TAS2764_TDM_CFG1_RX_FALLING; break; - default: - dev_err(tas2764->dev, "ASI format Inverse is not found\n"); - return -EINVAL; } ret = snd_soc_component_update_bits(component, TAS2764_TDM_CFG1, @@ -357,13 +360,13 @@ static int tas2764_set_fmt(struct snd_soc_dai *dai, unsigned int fmt) switch (fmt & SND_SOC_DAIFMT_FORMAT_MASK) { case SND_SOC_DAIFMT_I2S: + asi_cfg_0 ^= TAS2764_TDM_CFG0_FRAME_START; + fallthrough; case SND_SOC_DAIFMT_DSP_A: - iface = TAS2764_TDM_CFG2_SCFG_I2S; tdm_rx_start_slot = 1; break; case SND_SOC_DAIFMT_DSP_B: case SND_SOC_DAIFMT_LEFT_J: - iface = TAS2764_TDM_CFG2_SCFG_LEFT_J; tdm_rx_start_slot = 0; break; default: @@ -372,14 +375,15 @@ static int tas2764_set_fmt(struct snd_soc_dai *dai, unsigned int fmt) return -EINVAL; } - ret = snd_soc_component_update_bits(component, TAS2764_TDM_CFG1, - TAS2764_TDM_CFG1_MASK, - (tdm_rx_start_slot << TAS2764_TDM_CFG1_51_SHIFT)); + ret = snd_soc_component_update_bits(component, TAS2764_TDM_CFG0, + TAS2764_TDM_CFG0_FRAME_START, + asi_cfg_0); if (ret < 0) return ret; - ret = snd_soc_component_update_bits(component, TAS2764_TDM_CFG2, - TAS2764_TDM_CFG2_SCFG_MASK, iface); + ret = snd_soc_component_update_bits(component, TAS2764_TDM_CFG1, + TAS2764_TDM_CFG1_MASK, + (tdm_rx_start_slot << TAS2764_TDM_CFG1_51_SHIFT)); if (ret < 0) return ret; diff --git a/sound/soc/codecs/tas2764.h b/sound/soc/codecs/tas2764.h index 67d6fd903c42cc..f015f22a083b56 100644 --- a/sound/soc/codecs/tas2764.h +++ b/sound/soc/codecs/tas2764.h @@ -47,6 +47,7 @@ #define TAS2764_TDM_CFG0_MASK GENMASK(3, 1) #define TAS2764_TDM_CFG0_44_1_48KHZ BIT(3) #define TAS2764_TDM_CFG0_88_2_96KHZ (BIT(3) | BIT(1)) +#define TAS2764_TDM_CFG0_FRAME_START BIT(0) /* TDM Configuration Reg1 */ #define TAS2764_TDM_CFG1 TAS2764_REG(0X0, 0x09) @@ -66,10 +67,7 @@ #define TAS2764_TDM_CFG2_RXS_16BITS 0x0 #define TAS2764_TDM_CFG2_RXS_24BITS BIT(0) #define TAS2764_TDM_CFG2_RXS_32BITS BIT(1) -#define TAS2764_TDM_CFG2_SCFG_MASK GENMASK(5, 4) -#define TAS2764_TDM_CFG2_SCFG_I2S 0x0 -#define TAS2764_TDM_CFG2_SCFG_LEFT_J BIT(4) -#define TAS2764_TDM_CFG2_SCFG_RIGHT_J BIT(5) +#define TAS2764_TDM_CFG2_SCFG_SHIFT 4 /* TDM Configuration Reg3 */ #define TAS2764_TDM_CFG3 TAS2764_REG(0X0, 0x0c) From a92e7564c540a25c094ba7e9d4e227fde7810a4d Mon Sep 17 00:00:00 2001 From: Hector Martin Date: Thu, 30 Jun 2022 09:51:34 +0200 Subject: [PATCH 0538/1056] ASoC: tas2764: Correct playback volume range MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 3e99e5697e1f7120b5abc755e8a560b22612d6ed ] DVC value 0xc8 is -100dB and 0xc9 is mute; this needs to map to -100.5dB as far as the dB scale is concerned. Fix that and enable the mute flag, so alsamixer correctly shows the control as <0 dB .. -100 dB, mute>. Signed-off-by: Hector Martin Fixes: 827ed8a0fa50 ("ASoC: tas2764: Add the driver for the TAS2764") Signed-off-by: Martin Povišer Link: https://lore.kernel.org/r/20220630075135.2221-3-povik+lin@cutebit.org Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/codecs/tas2764.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/tas2764.c b/sound/soc/codecs/tas2764.c index 46c815650b2c00..bd79bc7ecf6bb6 100644 --- a/sound/soc/codecs/tas2764.c +++ b/sound/soc/codecs/tas2764.c @@ -536,7 +536,7 @@ static int tas2764_codec_probe(struct snd_soc_component *component) } static DECLARE_TLV_DB_SCALE(tas2764_digital_tlv, 1100, 50, 0); -static DECLARE_TLV_DB_SCALE(tas2764_playback_volume, -10000, 50, 0); +static DECLARE_TLV_DB_SCALE(tas2764_playback_volume, -10050, 50, 1); static const struct snd_kcontrol_new tas2764_snd_controls[] = { SOC_SINGLE_TLV("Speaker Volume", TAS2764_DVC, 0, From dfe3ce23217c401c41346795736805205768feac Mon Sep 17 00:00:00 2001 From: Hector Martin Date: Thu, 30 Jun 2022 09:51:35 +0200 Subject: [PATCH 0539/1056] ASoC: tas2764: Fix amp gain register offset & default MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 1c4f29ec878bbf1cc0a1eb54ae7da5ff98e19641 ] The register default is 0x28 per the datasheet, and the amp gain field is supposed to be shifted left by one. With the wrong default, the ALSA controls lie about the power-up state. With the wrong shift, we get only half the gain we expect. Signed-off-by: Hector Martin Fixes: 827ed8a0fa50 ("ASoC: tas2764: Add the driver for the TAS2764") Signed-off-by: Martin Povišer Link: https://lore.kernel.org/r/20220630075135.2221-4-povik+lin@cutebit.org Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/codecs/tas2764.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/codecs/tas2764.c b/sound/soc/codecs/tas2764.c index bd79bc7ecf6bb6..ec13ba01e5223e 100644 --- a/sound/soc/codecs/tas2764.c +++ b/sound/soc/codecs/tas2764.c @@ -541,7 +541,7 @@ static DECLARE_TLV_DB_SCALE(tas2764_playback_volume, -10050, 50, 1); static const struct snd_kcontrol_new tas2764_snd_controls[] = { SOC_SINGLE_TLV("Speaker Volume", TAS2764_DVC, 0, TAS2764_DVC_MAX, 1, tas2764_playback_volume), - SOC_SINGLE_TLV("Amp Gain Volume", TAS2764_CHNL_0, 0, 0x14, 0, + SOC_SINGLE_TLV("Amp Gain Volume", TAS2764_CHNL_0, 1, 0x14, 0, tas2764_digital_tlv), }; @@ -566,7 +566,7 @@ static const struct reg_default tas2764_reg_defaults[] = { { TAS2764_SW_RST, 0x00 }, { TAS2764_PWR_CTRL, 0x1a }, { TAS2764_DVC, 0x00 }, - { TAS2764_CHNL_0, 0x00 }, + { TAS2764_CHNL_0, 0x28 }, { TAS2764_TDM_CFG0, 0x09 }, { TAS2764_TDM_CFG1, 0x02 }, { TAS2764_TDM_CFG2, 0x0a }, From 0d083ea282e5940add6050f20708cd74323c99c2 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Thu, 30 Jun 2022 09:56:37 +0300 Subject: [PATCH 0540/1056] ASoC: Intel: Skylake: Correct the ssp rate discovery in skl_get_ssp_clks() [ Upstream commit 219af251bd1694bce1f627d238347d2eaf13de61 ] The present flag is only set once when one rate has been found to be saved. This will effectively going to ignore any rate discovered at later time and based on the code, this is not the intention. Fixes: bc2bd45b1f7f3 ("ASoC: Intel: Skylake: Parse nhlt and register clock device") Signed-off-by: Peter Ujfalusi Reviewed-by: Cezary Rojewski Link: https://lore.kernel.org/r/20220630065638.11183-2-peter.ujfalusi@linux.intel.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/intel/skylake/skl-nhlt.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sound/soc/intel/skylake/skl-nhlt.c b/sound/soc/intel/skylake/skl-nhlt.c index 64226072f0ee25..cb02ec2557285d 100644 --- a/sound/soc/intel/skylake/skl-nhlt.c +++ b/sound/soc/intel/skylake/skl-nhlt.c @@ -201,7 +201,6 @@ static void skl_get_ssp_clks(struct skl_dev *skl, struct skl_ssp_clk *ssp_clks, struct nhlt_fmt_cfg *fmt_cfg; struct wav_fmt_ext *wav_fmt; unsigned long rate; - bool present = false; int rate_index = 0; u16 channels, bps; u8 clk_src; @@ -215,6 +214,8 @@ static void skl_get_ssp_clks(struct skl_dev *skl, struct skl_ssp_clk *ssp_clks, return; for (i = 0; i < fmt->fmt_count; i++) { + bool present = false; + fmt_cfg = &fmt->fmt_config[i]; wav_fmt = &fmt_cfg->fmt_ext; From 750a5e2e5368bdfc5f9b4ea3b7055e0151d5c920 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Thu, 30 Jun 2022 09:56:38 +0300 Subject: [PATCH 0541/1056] ASoC: Intel: Skylake: Correct the handling of fmt_config flexible array [ Upstream commit fc976f5629afb4160ee77798b14a693eac903ffd ] The struct nhlt_format's fmt_config is a flexible array, it must not be used as normal array. When moving to the next nhlt_fmt_cfg we need to take into account the data behind the ->config.caps (indicated by ->config.size). The logic of the code also changed: it is no longer saves the _last_ fmt_cfg for all found rates. Fixes: bc2bd45b1f7f3 ("ASoC: Intel: Skylake: Parse nhlt and register clock device") Signed-off-by: Peter Ujfalusi Reviewed-by: Cezary Rojewski Link: https://lore.kernel.org/r/20220630065638.11183-3-peter.ujfalusi@linux.intel.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/intel/skylake/skl-nhlt.c | 37 ++++++++++++++++++++---------- 1 file changed, 25 insertions(+), 12 deletions(-) diff --git a/sound/soc/intel/skylake/skl-nhlt.c b/sound/soc/intel/skylake/skl-nhlt.c index cb02ec2557285d..74f60f5dfaefd6 100644 --- a/sound/soc/intel/skylake/skl-nhlt.c +++ b/sound/soc/intel/skylake/skl-nhlt.c @@ -213,11 +213,12 @@ static void skl_get_ssp_clks(struct skl_dev *skl, struct skl_ssp_clk *ssp_clks, if (fmt->fmt_count == 0) return; + fmt_cfg = (struct nhlt_fmt_cfg *)fmt->fmt_config; for (i = 0; i < fmt->fmt_count; i++) { + struct nhlt_fmt_cfg *saved_fmt_cfg = fmt_cfg; bool present = false; - fmt_cfg = &fmt->fmt_config[i]; - wav_fmt = &fmt_cfg->fmt_ext; + wav_fmt = &saved_fmt_cfg->fmt_ext; channels = wav_fmt->fmt.channels; bps = wav_fmt->fmt.bits_per_sample; @@ -235,12 +236,18 @@ static void skl_get_ssp_clks(struct skl_dev *skl, struct skl_ssp_clk *ssp_clks, * derive the rate. */ for (j = i; j < fmt->fmt_count; j++) { - fmt_cfg = &fmt->fmt_config[j]; - wav_fmt = &fmt_cfg->fmt_ext; + struct nhlt_fmt_cfg *tmp_fmt_cfg = fmt_cfg; + + wav_fmt = &tmp_fmt_cfg->fmt_ext; if ((fs == wav_fmt->fmt.samples_per_sec) && - (bps == wav_fmt->fmt.bits_per_sample)) + (bps == wav_fmt->fmt.bits_per_sample)) { channels = max_t(u16, channels, wav_fmt->fmt.channels); + saved_fmt_cfg = tmp_fmt_cfg; + } + /* Move to the next nhlt_fmt_cfg */ + tmp_fmt_cfg = (struct nhlt_fmt_cfg *)(tmp_fmt_cfg->config.caps + + tmp_fmt_cfg->config.size); } rate = channels * bps * fs; @@ -256,8 +263,11 @@ static void skl_get_ssp_clks(struct skl_dev *skl, struct skl_ssp_clk *ssp_clks, /* Fill rate and parent for sclk/sclkfs */ if (!present) { + struct nhlt_fmt_cfg *first_fmt_cfg; + + first_fmt_cfg = (struct nhlt_fmt_cfg *)fmt->fmt_config; i2s_config_ext = (struct skl_i2s_config_blob_ext *) - fmt->fmt_config[0].config.caps; + first_fmt_cfg->config.caps; /* MCLK Divider Source Select */ if (is_legacy_blob(i2s_config_ext->hdr.sig)) { @@ -271,6 +281,9 @@ static void skl_get_ssp_clks(struct skl_dev *skl, struct skl_ssp_clk *ssp_clks, parent = skl_get_parent_clk(clk_src); + /* Move to the next nhlt_fmt_cfg */ + fmt_cfg = (struct nhlt_fmt_cfg *)(fmt_cfg->config.caps + + fmt_cfg->config.size); /* * Do not copy the config data if there is no parent * clock available for this clock source select @@ -279,9 +292,9 @@ static void skl_get_ssp_clks(struct skl_dev *skl, struct skl_ssp_clk *ssp_clks, continue; sclk[id].rate_cfg[rate_index].rate = rate; - sclk[id].rate_cfg[rate_index].config = fmt_cfg; + sclk[id].rate_cfg[rate_index].config = saved_fmt_cfg; sclkfs[id].rate_cfg[rate_index].rate = rate; - sclkfs[id].rate_cfg[rate_index].config = fmt_cfg; + sclkfs[id].rate_cfg[rate_index].config = saved_fmt_cfg; sclk[id].parent_name = parent->name; sclkfs[id].parent_name = parent->name; @@ -295,13 +308,13 @@ static void skl_get_mclk(struct skl_dev *skl, struct skl_ssp_clk *mclk, { struct skl_i2s_config_blob_ext *i2s_config_ext; struct skl_i2s_config_blob_legacy *i2s_config; - struct nhlt_specific_cfg *fmt_cfg; + struct nhlt_fmt_cfg *fmt_cfg; struct skl_clk_parent_src *parent; u32 clkdiv, div_ratio; u8 clk_src; - fmt_cfg = &fmt->fmt_config[0].config; - i2s_config_ext = (struct skl_i2s_config_blob_ext *)fmt_cfg->caps; + fmt_cfg = (struct nhlt_fmt_cfg *)fmt->fmt_config; + i2s_config_ext = (struct skl_i2s_config_blob_ext *)fmt_cfg->config.caps; /* MCLK Divider Source Select and divider */ if (is_legacy_blob(i2s_config_ext->hdr.sig)) { @@ -330,7 +343,7 @@ static void skl_get_mclk(struct skl_dev *skl, struct skl_ssp_clk *mclk, return; mclk[id].rate_cfg[0].rate = parent->rate/div_ratio; - mclk[id].rate_cfg[0].config = &fmt->fmt_config[0]; + mclk[id].rate_cfg[0].config = fmt_cfg; mclk[id].parent_name = parent->name; } From d5c315a787652c35045044877a249f7d5c8a4104 Mon Sep 17 00:00:00 2001 From: Jon Hunter Date: Wed, 6 Jul 2022 09:39:13 +0100 Subject: [PATCH 0542/1056] net: stmmac: dwc-qos: Disable split header for Tegra194 [ Upstream commit 029c1c2059e9c4b38f97a06204cdecd10cfbeb8a ] There is a long-standing issue with the Synopsys DWC Ethernet driver for Tegra194 where random system crashes have been observed [0]. The problem occurs when the split header feature is enabled in the stmmac driver. In the bad case, a larger than expected buffer length is received and causes the calculation of the total buffer length to overflow. This results in a very large buffer length that causes the kernel to crash. Why this larger buffer length is received is not clear, however, the feedback from the NVIDIA design team is that the split header feature is not supported for Tegra194. Therefore, disable split header support for Tegra194 to prevent these random crashes from occurring. [0] https://lore.kernel.org/linux-tegra/b0b17697-f23e-8fa5-3757-604a86f3a095@nvidia.com/ Fixes: 67afd6d1cfdf ("net: stmmac: Add Split Header support and enable it in XGMAC cores") Signed-off-by: Jon Hunter Link: https://lore.kernel.org/r/20220706083913.13750-1-jonathanh@nvidia.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c index bc91fd867dcd46..358fc26f8d1fcd 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c @@ -361,6 +361,7 @@ static int tegra_eqos_probe(struct platform_device *pdev, data->fix_mac_speed = tegra_eqos_fix_speed; data->init = tegra_eqos_init; data->bsp_priv = eqos; + data->sph_disable = 1; err = tegra_eqos_init(pdev, eqos); if (err < 0) From ad3014b0f6b28b9448543b1bdb57a0831dd4b818 Mon Sep 17 00:00:00 2001 From: Siddharth Vadapalli Date: Wed, 6 Jul 2022 12:32:08 +0530 Subject: [PATCH 0543/1056] net: ethernet: ti: am65-cpsw: Fix devlink port register sequence [ Upstream commit 0680e20af5fbf41df8a11b11bd9a7c25b2ca0746 ] Renaming interfaces using udevd depends on the interface being registered before its netdev is registered. Otherwise, udevd reads an empty phys_port_name value, resulting in the interface not being renamed. Fix this by registering the interface before registering its netdev by invoking am65_cpsw_nuss_register_devlink() before invoking register_netdev() for the interface. Move the function call to devlink_port_type_eth_set(), invoking it after register_netdev() is invoked, to ensure that netlink notification for the port state change is generated after the netdev is completely initialized. Fixes: 58356eb31d60 ("net: ti: am65-cpsw-nuss: Add devlink support") Signed-off-by: Siddharth Vadapalli Link: https://lore.kernel.org/r/20220706070208.12207-1-s-vadapalli@ti.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/ti/am65-cpsw-nuss.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c index ea9d073e87fa69..901571c2626a19 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c +++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c @@ -2467,7 +2467,6 @@ static int am65_cpsw_nuss_register_devlink(struct am65_cpsw_common *common) port->port_id, ret); goto dl_port_unreg; } - devlink_port_type_eth_set(dl_port, port->ndev); } return ret; @@ -2514,6 +2513,7 @@ static void am65_cpsw_unregister_devlink(struct am65_cpsw_common *common) static int am65_cpsw_nuss_register_ndevs(struct am65_cpsw_common *common) { struct device *dev = common->dev; + struct devlink_port *dl_port; struct am65_cpsw_port *port; int ret = 0, i; @@ -2530,6 +2530,10 @@ static int am65_cpsw_nuss_register_ndevs(struct am65_cpsw_common *common) return ret; } + ret = am65_cpsw_nuss_register_devlink(common); + if (ret) + return ret; + for (i = 0; i < common->port_num; i++) { port = &common->ports[i]; @@ -2542,25 +2546,24 @@ static int am65_cpsw_nuss_register_ndevs(struct am65_cpsw_common *common) i, ret); goto err_cleanup_ndev; } + + dl_port = &port->devlink_port; + devlink_port_type_eth_set(dl_port, port->ndev); } ret = am65_cpsw_register_notifiers(common); if (ret) goto err_cleanup_ndev; - ret = am65_cpsw_nuss_register_devlink(common); - if (ret) - goto clean_unregister_notifiers; - /* can't auto unregister ndev using devm_add_action() due to * devres release sequence in DD core for DMA */ return 0; -clean_unregister_notifiers: - am65_cpsw_unregister_notifiers(common); + err_cleanup_ndev: am65_cpsw_nuss_cleanup_ndev(common); + am65_cpsw_unregister_devlink(common); return ret; } From 279bf2a909e62124bf4a3b0faf735a5721e4c7f0 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 6 Jul 2022 16:39:52 -0700 Subject: [PATCH 0544/1056] sysctl: Fix data races in proc_dointvec(). [ Upstream commit 1f1be04b4d48a2475ea1aab46a99221bfc5c0968 ] A sysctl variable is accessed concurrently, and there is always a chance of data-race. So, all readers and writers need some basic protection to avoid load/store-tearing. This patch changes proc_dointvec() to use READ_ONCE() and WRITE_ONCE() internally to fix data-races on the sysctl side. For now, proc_dointvec() itself is tolerant to a data-race, but we still need to add annotations on the other subsystem's side. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- kernel/sysctl.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 0586047f73234a..11f0714273ab40 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -566,14 +566,14 @@ static int do_proc_dointvec_conv(bool *negp, unsigned long *lvalp, if (*negp) { if (*lvalp > (unsigned long) INT_MAX + 1) return -EINVAL; - *valp = -*lvalp; + WRITE_ONCE(*valp, -*lvalp); } else { if (*lvalp > (unsigned long) INT_MAX) return -EINVAL; - *valp = *lvalp; + WRITE_ONCE(*valp, *lvalp); } } else { - int val = *valp; + int val = READ_ONCE(*valp); if (val < 0) { *negp = true; *lvalp = -(unsigned long)val; From d335db59f7fb3353f56e52371f1ee796ae9c8f09 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 6 Jul 2022 16:39:53 -0700 Subject: [PATCH 0545/1056] sysctl: Fix data races in proc_douintvec(). [ Upstream commit 4762b532ec9539755aab61445d5da6e1926ccb99 ] A sysctl variable is accessed concurrently, and there is always a chance of data-race. So, all readers and writers need some basic protection to avoid load/store-tearing. This patch changes proc_douintvec() to use READ_ONCE() and WRITE_ONCE() internally to fix data-races on the sysctl side. For now, proc_douintvec() itself is tolerant to a data-race, but we still need to add annotations on the other subsystem's side. Fixes: e7d316a02f68 ("sysctl: handle error writing UINT_MAX to u32 fields") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- kernel/sysctl.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 11f0714273ab40..b152e0a30a2b90 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -592,9 +592,9 @@ static int do_proc_douintvec_conv(unsigned long *lvalp, if (write) { if (*lvalp > UINT_MAX) return -EINVAL; - *valp = *lvalp; + WRITE_ONCE(*valp, *lvalp); } else { - unsigned int val = *valp; + unsigned int val = READ_ONCE(*valp); *lvalp = (unsigned long)val; } return 0; From 32d7f8da824407889eafc07b0c8ee7920648b571 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 6 Jul 2022 16:39:54 -0700 Subject: [PATCH 0546/1056] sysctl: Fix data races in proc_dointvec_minmax(). [ Upstream commit f613d86d014b6375a4085901de39406598121e35 ] A sysctl variable is accessed concurrently, and there is always a chance of data-race. So, all readers and writers need some basic protection to avoid load/store-tearing. This patch changes proc_dointvec_minmax() to use READ_ONCE() and WRITE_ONCE() internally to fix data-races on the sysctl side. For now, proc_dointvec_minmax() itself is tolerant to a data-race, but we still need to add annotations on the other subsystem's side. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- kernel/sysctl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/sysctl.c b/kernel/sysctl.c index b152e0a30a2b90..f5134435fb9c0e 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -988,7 +988,7 @@ static int do_proc_dointvec_minmax_conv(bool *negp, unsigned long *lvalp, if ((param->min && *param->min > tmp) || (param->max && *param->max < tmp)) return -EINVAL; - *valp = tmp; + WRITE_ONCE(*valp, tmp); } return 0; From 40e0477a7371d101c55b69d9c32a7a1ed82ab5ea Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 6 Jul 2022 16:39:55 -0700 Subject: [PATCH 0547/1056] sysctl: Fix data races in proc_douintvec_minmax(). [ Upstream commit 2d3b559df3ed39258737789aae2ae7973d205bc1 ] A sysctl variable is accessed concurrently, and there is always a chance of data-race. So, all readers and writers need some basic protection to avoid load/store-tearing. This patch changes proc_douintvec_minmax() to use READ_ONCE() and WRITE_ONCE() internally to fix data-races on the sysctl side. For now, proc_douintvec_minmax() itself is tolerant to a data-race, but we still need to add annotations on the other subsystem's side. Fixes: 61d9b56a8920 ("sysctl: add unsigned int range support") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- kernel/sysctl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/sysctl.c b/kernel/sysctl.c index f5134435fb9c0e..bdb2f17b723f5d 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1054,7 +1054,7 @@ static int do_proc_douintvec_minmax_conv(unsigned long *lvalp, (param->max && *param->max < tmp)) return -ERANGE; - *valp = tmp; + WRITE_ONCE(*valp, tmp); } return 0; From dcdf3c3c587a35520caa7b0c44a53e42659af5b7 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 6 Jul 2022 16:39:56 -0700 Subject: [PATCH 0548/1056] sysctl: Fix data races in proc_doulongvec_minmax(). [ Upstream commit c31bcc8fb89fc2812663900589c6325ba35d9a65 ] A sysctl variable is accessed concurrently, and there is always a chance of data-race. So, all readers and writers need some basic protection to avoid load/store-tearing. This patch changes proc_doulongvec_minmax() to use READ_ONCE() and WRITE_ONCE() internally to fix data-races on the sysctl side. For now, proc_doulongvec_minmax() itself is tolerant to a data-race, but we still need to add annotations on the other subsystem's side. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- kernel/sysctl.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/sysctl.c b/kernel/sysctl.c index bdb2f17b723f5d..9a68da5e15515e 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1281,9 +1281,9 @@ static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table, err = -EINVAL; break; } - *i = val; + WRITE_ONCE(*i, val); } else { - val = convdiv * (*i) / convmul; + val = convdiv * READ_ONCE(*i) / convmul; if (!first) proc_put_char(&buffer, &left, '\t'); proc_put_long(&buffer, &left, val, false); From 67623d290d4595e51e688e8b1ebe24e59ae4c727 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 6 Jul 2022 16:39:57 -0700 Subject: [PATCH 0549/1056] sysctl: Fix data races in proc_dointvec_jiffies(). [ Upstream commit e877820877663fbae8cb9582ea597a7230b94df3 ] A sysctl variable is accessed concurrently, and there is always a chance of data-race. So, all readers and writers need some basic protection to avoid load/store-tearing. This patch changes proc_dointvec_jiffies() to use READ_ONCE() and WRITE_ONCE() internally to fix data-races on the sysctl side. For now, proc_dointvec_jiffies() itself is tolerant to a data-race, but we still need to add annotations on the other subsystem's side. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- kernel/sysctl.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 9a68da5e15515e..5be8108a9a45ee 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1364,9 +1364,12 @@ static int do_proc_dointvec_jiffies_conv(bool *negp, unsigned long *lvalp, if (write) { if (*lvalp > INT_MAX / HZ) return 1; - *valp = *negp ? -(*lvalp*HZ) : (*lvalp*HZ); + if (*negp) + WRITE_ONCE(*valp, -*lvalp * HZ); + else + WRITE_ONCE(*valp, *lvalp * HZ); } else { - int val = *valp; + int val = READ_ONCE(*valp); unsigned long lval; if (val < 0) { *negp = true; From 2dfff4b607c4b75339d3ec4adaa06dbf0a7261d9 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 6 Jul 2022 16:39:58 -0700 Subject: [PATCH 0550/1056] tcp: Fix a data-race around sysctl_tcp_max_orphans. [ Upstream commit 47e6ab24e8c6e3ca10ceb5835413f401f90de4bf ] While reading sysctl_tcp_max_orphans, it can be changed concurrently. So, we need to add READ_ONCE() to avoid a data-race. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/ipv4/tcp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index f79b5a98888c1b..4ac53c8f0583a9 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2714,7 +2714,8 @@ static void tcp_orphan_update(struct timer_list *unused) static bool tcp_too_many_orphans(int shift) { - return READ_ONCE(tcp_orphan_cache) << shift > sysctl_tcp_max_orphans; + return READ_ONCE(tcp_orphan_cache) << shift > + READ_ONCE(sysctl_tcp_max_orphans); } bool tcp_check_oom(struct sock *sk, int shift) From 82d07170290d743819072d99c7bd42e466011244 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 6 Jul 2022 16:39:59 -0700 Subject: [PATCH 0551/1056] inetpeer: Fix data-races around sysctl. [ Upstream commit 3d32edf1f3c38d3301f6434e56316f293466d7fb ] While reading inetpeer sysctl variables, they can be changed concurrently. So, we need to add READ_ONCE() to avoid data-races. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/ipv4/inetpeer.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index da21dfce24d739..e9fed83e9b3cc5 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -141,16 +141,20 @@ static void inet_peer_gc(struct inet_peer_base *base, struct inet_peer *gc_stack[], unsigned int gc_cnt) { + int peer_threshold, peer_maxttl, peer_minttl; struct inet_peer *p; __u32 delta, ttl; int i; - if (base->total >= inet_peer_threshold) + peer_threshold = READ_ONCE(inet_peer_threshold); + peer_maxttl = READ_ONCE(inet_peer_maxttl); + peer_minttl = READ_ONCE(inet_peer_minttl); + + if (base->total >= peer_threshold) ttl = 0; /* be aggressive */ else - ttl = inet_peer_maxttl - - (inet_peer_maxttl - inet_peer_minttl) / HZ * - base->total / inet_peer_threshold * HZ; + ttl = peer_maxttl - (peer_maxttl - peer_minttl) / HZ * + base->total / peer_threshold * HZ; for (i = 0; i < gc_cnt; i++) { p = gc_stack[i]; From a5796154b56fcc8f442a499fd3b7a17bf3d82fb1 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 6 Jul 2022 16:40:00 -0700 Subject: [PATCH 0552/1056] net: Fix data-races around sysctl_mem. [ Upstream commit 310731e2f1611d1d13aae237abcf8e66d33345d5 ] While reading .sysctl_mem, it can be changed concurrently. So, we need to add READ_ONCE() to avoid data-races. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- include/net/sock.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/net/sock.h b/include/net/sock.h index 7d49196a3880ef..96f51d4b164997 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1484,7 +1484,7 @@ void __sk_mem_reclaim(struct sock *sk, int amount); /* sysctl_mem values are in pages, we convert them in SK_MEM_QUANTUM units */ static inline long sk_prot_mem_limits(const struct sock *sk, int index) { - long val = sk->sk_prot->sysctl_mem[index]; + long val = READ_ONCE(sk->sk_prot->sysctl_mem[index]); #if PAGE_SIZE > SK_MEM_QUANTUM val <<= PAGE_SHIFT - SK_MEM_QUANTUM_SHIFT; From 07b0caf8aeb9b82e6ecc6c292a3e47c7fcdb1148 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 6 Jul 2022 16:40:01 -0700 Subject: [PATCH 0553/1056] cipso: Fix data-races around sysctl. [ Upstream commit dd44f04b9214adb68ef5684ae87a81ba03632250 ] While reading cipso sysctl variables, they can be changed concurrently. So, we need to add READ_ONCE() to avoid data-races. Fixes: 446fda4f2682 ("[NetLabel]: CIPSOv4 engine") Signed-off-by: Kuniyuki Iwashima Acked-by: Paul Moore Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- Documentation/networking/ip-sysctl.rst | 2 +- net/ipv4/cipso_ipv4.c | 12 +++++++----- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst index d91ab28718d493..8d772484755af5 100644 --- a/Documentation/networking/ip-sysctl.rst +++ b/Documentation/networking/ip-sysctl.rst @@ -1063,7 +1063,7 @@ cipso_cache_enable - BOOLEAN cipso_cache_bucket_size - INTEGER The CIPSO label cache consists of a fixed size hash table with each hash bucket containing a number of cache entries. This variable limits - the number of entries in each hash bucket; the larger the value the + the number of entries in each hash bucket; the larger the value is, the more CIPSO label mappings that can be cached. When the number of entries in a given hash bucket reaches this limit adding new entries causes the oldest entry in the bucket to be removed to make room. diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c index 099259fc826aa2..75ac1452534455 100644 --- a/net/ipv4/cipso_ipv4.c +++ b/net/ipv4/cipso_ipv4.c @@ -239,7 +239,7 @@ static int cipso_v4_cache_check(const unsigned char *key, struct cipso_v4_map_cache_entry *prev_entry = NULL; u32 hash; - if (!cipso_v4_cache_enabled) + if (!READ_ONCE(cipso_v4_cache_enabled)) return -ENOENT; hash = cipso_v4_map_cache_hash(key, key_len); @@ -296,13 +296,14 @@ static int cipso_v4_cache_check(const unsigned char *key, int cipso_v4_cache_add(const unsigned char *cipso_ptr, const struct netlbl_lsm_secattr *secattr) { + int bkt_size = READ_ONCE(cipso_v4_cache_bucketsize); int ret_val = -EPERM; u32 bkt; struct cipso_v4_map_cache_entry *entry = NULL; struct cipso_v4_map_cache_entry *old_entry = NULL; u32 cipso_ptr_len; - if (!cipso_v4_cache_enabled || cipso_v4_cache_bucketsize <= 0) + if (!READ_ONCE(cipso_v4_cache_enabled) || bkt_size <= 0) return 0; cipso_ptr_len = cipso_ptr[1]; @@ -322,7 +323,7 @@ int cipso_v4_cache_add(const unsigned char *cipso_ptr, bkt = entry->hash & (CIPSO_V4_CACHE_BUCKETS - 1); spin_lock_bh(&cipso_v4_cache[bkt].lock); - if (cipso_v4_cache[bkt].size < cipso_v4_cache_bucketsize) { + if (cipso_v4_cache[bkt].size < bkt_size) { list_add(&entry->list, &cipso_v4_cache[bkt].list); cipso_v4_cache[bkt].size += 1; } else { @@ -1199,7 +1200,8 @@ static int cipso_v4_gentag_rbm(const struct cipso_v4_doi *doi_def, /* This will send packets using the "optimized" format when * possible as specified in section 3.4.2.6 of the * CIPSO draft. */ - if (cipso_v4_rbm_optfmt && ret_val > 0 && ret_val <= 10) + if (READ_ONCE(cipso_v4_rbm_optfmt) && ret_val > 0 && + ret_val <= 10) tag_len = 14; else tag_len = 4 + ret_val; @@ -1603,7 +1605,7 @@ int cipso_v4_validate(const struct sk_buff *skb, unsigned char **option) * all the CIPSO validations here but it doesn't * really specify _exactly_ what we need to validate * ... so, just make it a sysctl tunable. */ - if (cipso_v4_rbm_strictvalid) { + if (READ_ONCE(cipso_v4_rbm_strictvalid)) { if (cipso_v4_map_lvl_valid(doi_def, tag[3]) < 0) { err_offset = opt_iter + 3; From e2828e8c605853f71267825c9415437c0a93e4f2 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 6 Jul 2022 16:40:02 -0700 Subject: [PATCH 0554/1056] icmp: Fix data-races around sysctl. [ Upstream commit 48d7ee321ea5182c6a70782aa186422a70e67e22 ] While reading icmp sysctl variables, they can be changed concurrently. So, we need to add READ_ONCE() to avoid data-races. Fixes: 4cdf507d5452 ("icmp: add a global rate limitation") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/ipv4/icmp.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index b7e277d8a84d22..b5766b62ca97a2 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -261,11 +261,12 @@ bool icmp_global_allow(void) spin_lock(&icmp_global.lock); delta = min_t(u32, now - icmp_global.stamp, HZ); if (delta >= HZ / 50) { - incr = sysctl_icmp_msgs_per_sec * delta / HZ ; + incr = READ_ONCE(sysctl_icmp_msgs_per_sec) * delta / HZ; if (incr) WRITE_ONCE(icmp_global.stamp, now); } - credit = min_t(u32, icmp_global.credit + incr, sysctl_icmp_msgs_burst); + credit = min_t(u32, icmp_global.credit + incr, + READ_ONCE(sysctl_icmp_msgs_burst)); if (credit) { /* We want to use a credit of one in average, but need to randomize * it for security reasons. From 9be8aac91960ea32fd0e874758c9afee665c57d2 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 6 Jul 2022 16:40:03 -0700 Subject: [PATCH 0555/1056] ipv4: Fix a data-race around sysctl_fib_sync_mem. [ Upstream commit 73318c4b7dbd0e781aaababff17376b2894745c0 ] While reading sysctl_fib_sync_mem, it can be changed concurrently. So, we need to add READ_ONCE() to avoid a data-race. Fixes: 9ab948a91b2c ("ipv4: Allow amount of dirty memory from fib resizing to be controllable") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/ipv4/fib_trie.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index f7f74d5c14da66..a9cd9c2bd84eb0 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -497,7 +497,7 @@ static void tnode_free(struct key_vector *tn) tn = container_of(head, struct tnode, rcu)->kv; } - if (tnode_free_size >= sysctl_fib_sync_mem) { + if (tnode_free_size >= READ_ONCE(sysctl_fib_sync_mem)) { tnode_free_size = 0; synchronize_rcu(); } From c0dd77cede0a28289d3979de44e40fe9a5afa088 Mon Sep 17 00:00:00 2001 From: Ryan Wanner Date: Thu, 7 Jul 2022 14:58:12 -0700 Subject: [PATCH 0556/1056] ARM: dts: at91: sama5d2: Fix typo in i2s1 node [ Upstream commit 2fdf15b50a46e366740df4cccbe2343269b4ff55 ] Fix typo in i2s1 causing errors in dt binding validation. Change assigned-parrents to assigned-clock-parents to match i2s0 node formatting. Fixes: 1ca81883c557 ("ARM: dts: at91: sama5d2: add nodes for I2S controllers") Signed-off-by: Ryan Wanner [claudiu.beznea: use imperative addressing in commit description, remove blank line after fixes tag, fix typo in commit message] Signed-off-by: Claudiu Beznea Link: https://lore.kernel.org/r/20220707215812.193008-1-Ryan.Wanner@microchip.com Signed-off-by: Sasha Levin --- arch/arm/boot/dts/sama5d2.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/sama5d2.dtsi b/arch/arm/boot/dts/sama5d2.dtsi index de88eb4847185f..4c87c2aa8fc864 100644 --- a/arch/arm/boot/dts/sama5d2.dtsi +++ b/arch/arm/boot/dts/sama5d2.dtsi @@ -1125,7 +1125,7 @@ clocks = <&pmc PMC_TYPE_PERIPHERAL 55>, <&pmc PMC_TYPE_GCK 55>; clock-names = "pclk", "gclk"; assigned-clocks = <&pmc PMC_TYPE_CORE PMC_I2S1_MUX>; - assigned-parrents = <&pmc PMC_TYPE_GCK 55>; + assigned-clock-parents = <&pmc PMC_TYPE_GCK 55>; status = "disabled"; }; From f0ccff1c89ba3dcbf28d712aeab1724642af4042 Mon Sep 17 00:00:00 2001 From: Michal Suchanek Date: Fri, 8 Jul 2022 19:45:29 +0200 Subject: [PATCH 0557/1056] ARM: dts: sunxi: Fix SPI NOR campatible on Orange Pi Zero [ Upstream commit 884b66976a7279ee889ba885fe364244d50b79e7 ] The device tree should include generic "jedec,spi-nor" compatible, and a manufacturer-specific one. The macronix part is what is shipped on the boards that come with a flash chip. Fixes: 45857ae95478 ("ARM: dts: orange-pi-zero: add node for SPI NOR") Signed-off-by: Michal Suchanek Acked-by: Jernej Skrabec Signed-off-by: Jernej Skrabec Link: https://lore.kernel.org/r/20220708174529.3360-1-msuchanek@suse.de Signed-off-by: Sasha Levin --- arch/arm/boot/dts/sun8i-h2-plus-orangepi-zero.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/sun8i-h2-plus-orangepi-zero.dts b/arch/arm/boot/dts/sun8i-h2-plus-orangepi-zero.dts index f19ed981da9d92..3706216ffb40ba 100644 --- a/arch/arm/boot/dts/sun8i-h2-plus-orangepi-zero.dts +++ b/arch/arm/boot/dts/sun8i-h2-plus-orangepi-zero.dts @@ -169,7 +169,7 @@ flash@0 { #address-cells = <1>; #size-cells = <1>; - compatible = "mxicy,mx25l1606e", "winbond,w25q128"; + compatible = "mxicy,mx25l1606e", "jedec,spi-nor"; reg = <0>; spi-max-frequency = <40000000>; }; From 1d49665a93b18c54bcf255966946252395d910e6 Mon Sep 17 00:00:00 2001 From: William Zhang Date: Fri, 8 Jul 2022 11:25:06 -0700 Subject: [PATCH 0558/1056] arm64: dts: broadcom: bcm4908: Fix timer node for BCM4906 SoC [ Upstream commit b4a544e415e9be33b37d9bfa9d9f9f4d13f553d6 ] The cpu mask value in interrupt property inherits from bcm4908.dtsi which sets to four cpus. Correct the value to two cpus for dual core BCM4906 SoC. Fixes: c8b404fb05dc ("arm64: dts: broadcom: bcm4908: add BCM4906 Netgear R8000P DTS files") Signed-off-by: William Zhang Signed-off-by: Florian Fainelli Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/broadcom/bcm4908/bcm4906.dtsi | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/arm64/boot/dts/broadcom/bcm4908/bcm4906.dtsi b/arch/arm64/boot/dts/broadcom/bcm4908/bcm4906.dtsi index 66023d5535247d..d084c33d5ca823 100644 --- a/arch/arm64/boot/dts/broadcom/bcm4908/bcm4906.dtsi +++ b/arch/arm64/boot/dts/broadcom/bcm4908/bcm4906.dtsi @@ -9,6 +9,14 @@ /delete-node/ cpu@3; }; + timer { + compatible = "arm,armv8-timer"; + interrupts = , + , + , + ; + }; + pmu { compatible = "arm,cortex-a53-pmu"; interrupts = , From dc2ec80bc949d3fdb4b4c6b848e7e25a2758272f Mon Sep 17 00:00:00 2001 From: William Zhang Date: Fri, 8 Jul 2022 11:25:07 -0700 Subject: [PATCH 0559/1056] arm64: dts: broadcom: bcm4908: Fix cpu node for smp boot [ Upstream commit 8bd582ae9a71d7f14c4e0c735b2eacaf7516d626 ] Add spin-table enable-method and cpu-release-addr properties for cpu0 node. This is required by all ARMv8 SoC. Otherwise some bootloader like u-boot can not update cpu-release-addr and linux fails to start up secondary cpus. Fixes: 2961f69f151c ("arm64: dts: broadcom: add BCM4908 and Asus GT-AC5300 early DTS files") Signed-off-by: William Zhang Signed-off-by: Florian Fainelli Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/broadcom/bcm4908/bcm4908.dtsi | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/boot/dts/broadcom/bcm4908/bcm4908.dtsi b/arch/arm64/boot/dts/broadcom/bcm4908/bcm4908.dtsi index e8907d3fe2d119..e510a6961cf959 100644 --- a/arch/arm64/boot/dts/broadcom/bcm4908/bcm4908.dtsi +++ b/arch/arm64/boot/dts/broadcom/bcm4908/bcm4908.dtsi @@ -29,6 +29,8 @@ device_type = "cpu"; compatible = "brcm,brahma-b53"; reg = <0x0>; + enable-method = "spin-table"; + cpu-release-addr = <0x0 0xfff8>; next-level-cache = <&l2>; }; From b97aa619a3cbe334d52843e728e9c1b25c0f6794 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 5 Jul 2022 10:26:15 +0200 Subject: [PATCH 0560/1056] netfilter: nf_log: incorrect offset to network header [ Upstream commit 7a847c00eeba9744353ecdfad253143b9115678a ] NFPROTO_ARP is expecting to find the ARP header at the network offset. In the particular case of ARP, HTYPE= field shows the initial bytes of the ethernet header destination MAC address. netdev out: IN= OUT=bridge0 MACSRC=c2:76:e5:71:e1:de MACDST=36:b0:4a:e2:72:ea MACPROTO=0806 ARP HTYPE=14000 PTYPE=0x4ae2 OPCODE=49782 NFPROTO_NETDEV egress hook is also expecting to find the IP headers at the network offset. Fixes: 35b9395104d5 ("netfilter: add generic ARP packet logger") Reported-by: Tom Yan Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/nf_log_syslog.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/netfilter/nf_log_syslog.c b/net/netfilter/nf_log_syslog.c index 13234641cdb347..7000e069bc0760 100644 --- a/net/netfilter/nf_log_syslog.c +++ b/net/netfilter/nf_log_syslog.c @@ -61,7 +61,7 @@ dump_arp_packet(struct nf_log_buf *m, unsigned int logflags; struct arphdr _arph; - ah = skb_header_pointer(skb, 0, sizeof(_arph), &_arph); + ah = skb_header_pointer(skb, nhoff, sizeof(_arph), &_arph); if (!ah) { nf_log_buf_add(m, "TRUNCATED"); return; @@ -90,7 +90,7 @@ dump_arp_packet(struct nf_log_buf *m, ah->ar_pln != sizeof(__be32)) return; - ap = skb_header_pointer(skb, sizeof(_arph), sizeof(_arpp), &_arpp); + ap = skb_header_pointer(skb, nhoff + sizeof(_arph), sizeof(_arpp), &_arpp); if (!ap) { nf_log_buf_add(m, " INCOMPLETE [%zu bytes]", skb->len - sizeof(_arph)); @@ -144,7 +144,7 @@ static void nf_log_arp_packet(struct net *net, u_int8_t pf, nf_log_dump_packet_common(m, pf, hooknum, skb, in, out, loginfo, prefix); - dump_arp_packet(m, loginfo, skb, 0); + dump_arp_packet(m, loginfo, skb, skb_network_offset(skb)); nf_log_buf_close(m); } @@ -829,7 +829,7 @@ static void nf_log_ip_packet(struct net *net, u_int8_t pf, if (in) dump_ipv4_mac_header(m, loginfo, skb); - dump_ipv4_packet(net, m, loginfo, skb, 0); + dump_ipv4_packet(net, m, loginfo, skb, skb_network_offset(skb)); nf_log_buf_close(m); } From 5a4bb158f4c638d1fc494d2730357cfb6f5a4eea Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 5 Jul 2022 11:41:59 +0200 Subject: [PATCH 0561/1056] netfilter: nf_tables: replace BUG_ON by element length check [ Upstream commit c39ba4de6b0a843bec5d46c2b6f2064428dada5e ] BUG_ON can be triggered from userspace with an element with a large userdata area. Replace it by length check and return EINVAL instead. Over time extensions have been growing in size. Pick a sufficiently old Fixes: tag to propagate this fix. Fixes: 7d7402642eaf ("netfilter: nf_tables: variable sized set element keys / data") Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- include/net/netfilter/nf_tables.h | 14 +++--- net/netfilter/nf_tables_api.c | 72 ++++++++++++++++++++++--------- 2 files changed, 60 insertions(+), 26 deletions(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index bcfee89012a187..f56a1071c0052b 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -642,18 +642,22 @@ static inline void nft_set_ext_prepare(struct nft_set_ext_tmpl *tmpl) tmpl->len = sizeof(struct nft_set_ext); } -static inline void nft_set_ext_add_length(struct nft_set_ext_tmpl *tmpl, u8 id, - unsigned int len) +static inline int nft_set_ext_add_length(struct nft_set_ext_tmpl *tmpl, u8 id, + unsigned int len) { tmpl->len = ALIGN(tmpl->len, nft_set_ext_types[id].align); - BUG_ON(tmpl->len > U8_MAX); + if (tmpl->len > U8_MAX) + return -EINVAL; + tmpl->offset[id] = tmpl->len; tmpl->len += nft_set_ext_types[id].len + len; + + return 0; } -static inline void nft_set_ext_add(struct nft_set_ext_tmpl *tmpl, u8 id) +static inline int nft_set_ext_add(struct nft_set_ext_tmpl *tmpl, u8 id) { - nft_set_ext_add_length(tmpl, id, 0); + return nft_set_ext_add_length(tmpl, id, 0); } static inline void nft_set_ext_init(struct nft_set_ext *ext, diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 65d96439e2be70..a32acf056e326a 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -5736,8 +5736,11 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, if (!nla[NFTA_SET_ELEM_KEY] && !(flags & NFT_SET_ELEM_CATCHALL)) return -EINVAL; - if (flags != 0) - nft_set_ext_add(&tmpl, NFT_SET_EXT_FLAGS); + if (flags != 0) { + err = nft_set_ext_add(&tmpl, NFT_SET_EXT_FLAGS); + if (err < 0) + return err; + } if (set->flags & NFT_SET_MAP) { if (nla[NFTA_SET_ELEM_DATA] == NULL && @@ -5846,7 +5849,9 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, if (err < 0) goto err_set_elem_expr; - nft_set_ext_add_length(&tmpl, NFT_SET_EXT_KEY, set->klen); + err = nft_set_ext_add_length(&tmpl, NFT_SET_EXT_KEY, set->klen); + if (err < 0) + goto err_parse_key; } if (nla[NFTA_SET_ELEM_KEY_END]) { @@ -5855,22 +5860,31 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, if (err < 0) goto err_parse_key; - nft_set_ext_add_length(&tmpl, NFT_SET_EXT_KEY_END, set->klen); + err = nft_set_ext_add_length(&tmpl, NFT_SET_EXT_KEY_END, set->klen); + if (err < 0) + goto err_parse_key_end; } if (timeout > 0) { - nft_set_ext_add(&tmpl, NFT_SET_EXT_EXPIRATION); - if (timeout != set->timeout) - nft_set_ext_add(&tmpl, NFT_SET_EXT_TIMEOUT); + err = nft_set_ext_add(&tmpl, NFT_SET_EXT_EXPIRATION); + if (err < 0) + goto err_parse_key_end; + + if (timeout != set->timeout) { + err = nft_set_ext_add(&tmpl, NFT_SET_EXT_TIMEOUT); + if (err < 0) + goto err_parse_key_end; + } } if (num_exprs) { for (i = 0; i < num_exprs; i++) size += expr_array[i]->ops->size; - nft_set_ext_add_length(&tmpl, NFT_SET_EXT_EXPRESSIONS, - sizeof(struct nft_set_elem_expr) + - size); + err = nft_set_ext_add_length(&tmpl, NFT_SET_EXT_EXPRESSIONS, + sizeof(struct nft_set_elem_expr) + size); + if (err < 0) + goto err_parse_key_end; } if (nla[NFTA_SET_ELEM_OBJREF] != NULL) { @@ -5885,7 +5899,9 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, err = PTR_ERR(obj); goto err_parse_key_end; } - nft_set_ext_add(&tmpl, NFT_SET_EXT_OBJREF); + err = nft_set_ext_add(&tmpl, NFT_SET_EXT_OBJREF); + if (err < 0) + goto err_parse_key_end; } if (nla[NFTA_SET_ELEM_DATA] != NULL) { @@ -5919,7 +5935,9 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, NFT_VALIDATE_NEED); } - nft_set_ext_add_length(&tmpl, NFT_SET_EXT_DATA, desc.len); + err = nft_set_ext_add_length(&tmpl, NFT_SET_EXT_DATA, desc.len); + if (err < 0) + goto err_parse_data; } /* The full maximum length of userdata can exceed the maximum @@ -5929,9 +5947,12 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, ulen = 0; if (nla[NFTA_SET_ELEM_USERDATA] != NULL) { ulen = nla_len(nla[NFTA_SET_ELEM_USERDATA]); - if (ulen > 0) - nft_set_ext_add_length(&tmpl, NFT_SET_EXT_USERDATA, - ulen); + if (ulen > 0) { + err = nft_set_ext_add_length(&tmpl, NFT_SET_EXT_USERDATA, + ulen); + if (err < 0) + goto err_parse_data; + } } err = -ENOMEM; @@ -6157,8 +6178,11 @@ static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set, nft_set_ext_prepare(&tmpl); - if (flags != 0) - nft_set_ext_add(&tmpl, NFT_SET_EXT_FLAGS); + if (flags != 0) { + err = nft_set_ext_add(&tmpl, NFT_SET_EXT_FLAGS); + if (err < 0) + return err; + } if (nla[NFTA_SET_ELEM_KEY]) { err = nft_setelem_parse_key(ctx, set, &elem.key.val, @@ -6166,16 +6190,20 @@ static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set, if (err < 0) return err; - nft_set_ext_add_length(&tmpl, NFT_SET_EXT_KEY, set->klen); + err = nft_set_ext_add_length(&tmpl, NFT_SET_EXT_KEY, set->klen); + if (err < 0) + goto fail_elem; } if (nla[NFTA_SET_ELEM_KEY_END]) { err = nft_setelem_parse_key(ctx, set, &elem.key_end.val, nla[NFTA_SET_ELEM_KEY_END]); if (err < 0) - return err; + goto fail_elem; - nft_set_ext_add_length(&tmpl, NFT_SET_EXT_KEY_END, set->klen); + err = nft_set_ext_add_length(&tmpl, NFT_SET_EXT_KEY_END, set->klen); + if (err < 0) + goto fail_elem_key_end; } err = -ENOMEM; @@ -6183,7 +6211,7 @@ static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set, elem.key_end.val.data, NULL, 0, 0, GFP_KERNEL); if (elem.priv == NULL) - goto fail_elem; + goto fail_elem_key_end; ext = nft_set_elem_ext(set, elem.priv); if (flags) @@ -6207,6 +6235,8 @@ static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set, kfree(trans); fail_trans: kfree(elem.priv); +fail_elem_key_end: + nft_data_release(&elem.key_end.val, NFT_DATA_VALUE); fail_elem: nft_data_release(&elem.key.val, NFT_DATA_VALUE); return err; From f6e3ced9c60f3cab517cfb748572c26576573715 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 8 Jul 2022 11:41:06 +0300 Subject: [PATCH 0562/1056] drm/i915/gvt: IS_ERR() vs NULL bug in intel_gvt_update_reg_whitelist() [ Upstream commit e87197fbd137c888fd6c871c72fe7e89445dd015 ] The shmem_pin_map() function returns NULL, it doesn't return error pointers. Fixes: 97ea656521c8 ("drm/i915/gvt: Parse default state to update reg whitelist") Reviewed-by: Andrzej Hajda Signed-off-by: Dan Carpenter Signed-off-by: Zhenyu Wang Link: http://patchwork.freedesktop.org/patch/msgid/Ysftoia2BPUyqVcD@kili Acked-by: Zhenyu Wang Signed-off-by: Sasha Levin --- drivers/gpu/drm/i915/gvt/cmd_parser.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c index c4118b8082682f..11971ee929f895 100644 --- a/drivers/gpu/drm/i915/gvt/cmd_parser.c +++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c @@ -3115,9 +3115,9 @@ void intel_gvt_update_reg_whitelist(struct intel_vgpu *vgpu) continue; vaddr = shmem_pin_map(engine->default_state); - if (IS_ERR(vaddr)) { - gvt_err("failed to map %s->default state, err:%zd\n", - engine->name, PTR_ERR(vaddr)); + if (!vaddr) { + gvt_err("failed to map %s->default state\n", + engine->name); return; } From 1e92426e2b3ac0c43e469250e7b50f2634d16ed3 Mon Sep 17 00:00:00 2001 From: Demi Marie Obenour Date: Sun, 10 Jul 2022 19:05:22 -0400 Subject: [PATCH 0563/1056] xen/gntdev: Ignore failure to unmap INVALID_GRANT_HANDLE [ Upstream commit 166d3863231667c4f64dee72b77d1102cdfad11f ] The error paths of gntdev_mmap() can call unmap_grant_pages() even though not all of the pages have been successfully mapped. This will trigger the WARN_ON()s in __unmap_grant_pages_done(). The number of warnings can be very large; I have observed thousands of lines of warnings in the systemd journal. Avoid this problem by only warning on unmapping failure if the handle being unmapped is not INVALID_GRANT_HANDLE. The handle field of any page that was not successfully mapped will be INVALID_GRANT_HANDLE, so this catches all cases where unmapping can legitimately fail. Fixes: dbe97cff7dd9 ("xen/gntdev: Avoid blocking in unmap_grant_pages()") Cc: stable@vger.kernel.org Suggested-by: Juergen Gross Signed-off-by: Demi Marie Obenour Reviewed-by: Oleksandr Tyshchenko Reviewed-by: Juergen Gross Link: https://lore.kernel.org/r/20220710230522.1563-1-demi@invisiblethingslab.com Signed-off-by: Juergen Gross Signed-off-by: Sasha Levin --- drivers/xen/gntdev.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c index 4b56c39f766d4d..84b143eef395b1 100644 --- a/drivers/xen/gntdev.c +++ b/drivers/xen/gntdev.c @@ -396,13 +396,15 @@ static void __unmap_grant_pages_done(int result, unsigned int offset = data->unmap_ops - map->unmap_ops; for (i = 0; i < data->count; i++) { - WARN_ON(map->unmap_ops[offset+i].status); + WARN_ON(map->unmap_ops[offset + i].status != GNTST_okay && + map->unmap_ops[offset + i].handle != INVALID_GRANT_HANDLE); pr_debug("unmap handle=%d st=%d\n", map->unmap_ops[offset+i].handle, map->unmap_ops[offset+i].status); map->unmap_ops[offset+i].handle = INVALID_GRANT_HANDLE; if (use_ptemod) { - WARN_ON(map->kunmap_ops[offset+i].status); + WARN_ON(map->kunmap_ops[offset + i].status != GNTST_okay && + map->kunmap_ops[offset + i].handle != INVALID_GRANT_HANDLE); pr_debug("kunmap handle=%u st=%d\n", map->kunmap_ops[offset+i].handle, map->kunmap_ops[offset+i].status); From 9c3eef773cf4a8a18f959234bbb4c0a55c31ab71 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 11 Jul 2022 14:30:13 -0400 Subject: [PATCH 0564/1056] lockd: set fl_owner when unlocking files [ Upstream commit aec158242b87a43d83322e99bc71ab4428e5ab79 ] Unlocking a POSIX lock on an inode with vfs_lock_file only works if the owner matches. Ensure we set it in the request. Cc: J. Bruce Fields Fixes: 7f024fcd5c97 ("Keep read and write fds with each nlm_file") Signed-off-by: Jeff Layton Signed-off-by: Chuck Lever Signed-off-by: Sasha Levin --- fs/lockd/svcsubs.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c index 0a22a2faf55224..b2f277727469cd 100644 --- a/fs/lockd/svcsubs.c +++ b/fs/lockd/svcsubs.c @@ -176,7 +176,7 @@ nlm_delete_file(struct nlm_file *file) } } -static int nlm_unlock_files(struct nlm_file *file) +static int nlm_unlock_files(struct nlm_file *file, fl_owner_t owner) { struct file_lock lock; @@ -184,6 +184,7 @@ static int nlm_unlock_files(struct nlm_file *file) lock.fl_type = F_UNLCK; lock.fl_start = 0; lock.fl_end = OFFSET_MAX; + lock.fl_owner = owner; if (file->f_file[O_RDONLY] && vfs_lock_file(file->f_file[O_RDONLY], F_SETLK, &lock, NULL)) goto out_err; @@ -225,7 +226,7 @@ nlm_traverse_locks(struct nlm_host *host, struct nlm_file *file, if (match(lockhost, host)) { spin_unlock(&flctx->flc_lock); - if (nlm_unlock_files(file)) + if (nlm_unlock_files(file, fl->fl_owner)) return 1; goto again; } From 62a6a708f3922e08908292bab29c83323ed5f0b4 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 11 Jul 2022 14:30:14 -0400 Subject: [PATCH 0565/1056] lockd: fix nlm_close_files [ Upstream commit 1197eb5906a5464dbaea24cac296dfc38499cc00 ] This loop condition tries a bit too hard to be clever. Just test for the two indices we care about explicitly. Cc: J. Bruce Fields Fixes: 7f024fcd5c97 ("Keep read and write fds with each nlm_file") Signed-off-by: Jeff Layton Signed-off-by: Chuck Lever Signed-off-by: Sasha Levin --- fs/lockd/svcsubs.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c index b2f277727469cd..e1c4617de77147 100644 --- a/fs/lockd/svcsubs.c +++ b/fs/lockd/svcsubs.c @@ -283,11 +283,10 @@ nlm_file_inuse(struct nlm_file *file) static void nlm_close_files(struct nlm_file *file) { - struct file *f; - - for (f = file->f_file[0]; f <= file->f_file[1]; f++) - if (f) - nlmsvc_ops->fclose(f); + if (file->f_file[O_RDONLY]) + nlmsvc_ops->fclose(file->f_file[O_RDONLY]); + if (file->f_file[O_WRONLY]) + nlmsvc_ops->fclose(file->f_file[O_WRONLY]); } /* From af515a63394241ea79497c94f0b1bf967a5bb01e Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Fri, 8 Jul 2022 17:09:52 -0700 Subject: [PATCH 0566/1056] tracing: Fix sleeping while atomic in kdb ftdump [ Upstream commit 495fcec8648cdfb483b5b9ab310f3839f07cb3b8 ] If you drop into kdb and type "ftdump" you'll get a sleeping while atomic warning from memory allocation in trace_find_next_entry(). This appears to have been caused by commit ff895103a84a ("tracing: Save off entry when peeking at next entry"), which added the allocation in that path. The problematic commit was already fixed by commit 8e99cf91b99b ("tracing: Do not allocate buffer in trace_find_next_entry() in atomic") but that fix missed the kdb case. The fix here is easy: just move the assignment of the static buffer to the place where it should have been to begin with: trace_init_global_iter(). That function is called in two places, once is right before the assignment of the static buffer added by the previous fix and once is in kdb. Note that it appears that there's a second static buffer that we need to assign that was added in commit efbbdaa22bb7 ("tracing: Show real address for trace event arguments"), so we'll move that too. Link: https://lkml.kernel.org/r/20220708170919.1.I75844e5038d9425add2ad853a608cb44bb39df40@changeid Fixes: ff895103a84a ("tracing: Save off entry when peeking at next entry") Fixes: efbbdaa22bb7 ("tracing: Show real address for trace event arguments") Signed-off-by: Douglas Anderson Signed-off-by: Steven Rostedt (Google) Signed-off-by: Sasha Levin --- kernel/trace/trace.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 518ce39a878df2..f752f2574630b0 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -9831,6 +9831,12 @@ void trace_init_global_iter(struct trace_iterator *iter) /* Output in nanoseconds only if we are using a clock in nanoseconds. */ if (trace_clocks[iter->tr->clock_id].in_ns) iter->iter_flags |= TRACE_FILE_TIME_IN_NS; + + /* Can not use kmalloc for iter.temp and iter.fmt */ + iter->temp = static_temp_buf; + iter->temp_size = STATIC_TEMP_BUF_SIZE; + iter->fmt = static_fmt_buf; + iter->fmt_size = STATIC_FMT_BUF_SIZE; } void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) @@ -9863,11 +9869,6 @@ void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) /* Simulate the iterator */ trace_init_global_iter(&iter); - /* Can not use kmalloc for iter.temp and iter.fmt */ - iter.temp = static_temp_buf; - iter.temp_size = STATIC_TEMP_BUF_SIZE; - iter.fmt = static_fmt_buf; - iter.fmt_size = STATIC_FMT_BUF_SIZE; for_each_tracing_cpu(cpu) { atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled); From 40c12fc520234b0145bb776f38642507180dfad8 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 8 Jul 2022 12:41:04 +0300 Subject: [PATCH 0567/1056] drm/i915/selftests: fix a couple IS_ERR() vs NULL tests [ Upstream commit 896dcabd1f8f613c533d948df17408c41f8929f5 ] The shmem_pin_map() function doesn't return error pointers, it returns NULL. Fixes: be1cb55a07bf ("drm/i915/gt: Keep a no-frills swappable copy of the default context state") Signed-off-by: Dan Carpenter Reviewed-by: Matthew Auld Signed-off-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20220708094104.GL2316@kadam (cherry picked from commit d50f5a109cf4ed50c5b575c1bb5fc3bd17b23308) Signed-off-by: Rodrigo Vivi Signed-off-by: Sasha Levin --- drivers/gpu/drm/i915/gt/selftest_lrc.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c index b0977a3b699b85..bc2950fbbaf938 100644 --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c @@ -153,8 +153,8 @@ static int live_lrc_layout(void *arg) continue; hw = shmem_pin_map(engine->default_state); - if (IS_ERR(hw)) { - err = PTR_ERR(hw); + if (!hw) { + err = -ENOMEM; break; } hw += LRC_STATE_OFFSET / sizeof(*hw); @@ -329,8 +329,8 @@ static int live_lrc_fixed(void *arg) continue; hw = shmem_pin_map(engine->default_state); - if (IS_ERR(hw)) { - err = PTR_ERR(hw); + if (!hw) { + err = -ENOMEM; break; } hw += LRC_STATE_OFFSET / sizeof(*hw); From f8ba02531476196f44a486df178b4f1fec178234 Mon Sep 17 00:00:00 2001 From: Bruce Chang Date: Sat, 29 Jan 2022 00:22:08 +0530 Subject: [PATCH 0568/1056] drm/i915/dg2: Add Wa_22011100796 [ Upstream commit 154cfae6158141b18d65abb0db679bb51a8294e7 ] Whenever Full soft reset is required, reset all individual engines first, and then do a full soft reset. Signed-off-by: Bruce Chang cc: Matt Roper Cc: Rodrigo Vivi Signed-off-by: Ramalingam C Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20220128185209.18077-5-ramalingam.c@intel.com Signed-off-by: Sasha Levin --- drivers/gpu/drm/i915/gt/intel_reset.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c index 91200c43951f7b..b6697c1d260a56 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.c +++ b/drivers/gpu/drm/i915/gt/intel_reset.c @@ -623,6 +623,15 @@ static int gen8_reset_engines(struct intel_gt *gt, */ } + /* + * Wa_22011100796:dg2, whenever Full soft reset is required, + * reset all individual engines firstly, and then do a full soft reset. + * + * This is best effort, so ignore any error from the initial reset. + */ + if (IS_DG2(gt->i915) && engine_mask == ALL_ENGINES) + gen11_reset_engines(gt, gt->info.engine_mask, 0); + if (GRAPHICS_VER(gt->i915) >= 11) ret = gen11_reset_engines(gt, engine_mask, retry); else From 0ee5874dad61d2b154a9e3db196fc33e8208ce1b Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 12 Jul 2022 16:21:32 +0100 Subject: [PATCH 0569/1056] drm/i915/gt: Serialize GRDOM access between multiple engine resets MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit b24dcf1dc507f69ed3b5c66c2b6a0209ae80d4d4 ] Don't allow two engines to be reset in parallel, as they would both try to select a reset bit (and send requests to common registers) and wait on that register, at the same time. Serialize control of the reset requests/acks using the uncore->lock, which will also ensure that no other GT state changes at the same time as the actual reset. Cc: stable@vger.kernel.org # v4.4 and upper Reported-by: Mika Kuoppala Signed-off-by: Chris Wilson Acked-by: Mika Kuoppala Reviewed-by: Andi Shyti Reviewed-by: Andrzej Hajda Acked-by: Thomas Hellström Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/e0a2d894e77aed7c2e36b0d1abdc7dbac3011729.1657639152.git.mchehab@kernel.org (cherry picked from commit 336561a914fc0c6f1218228718f633b31b7af1c3) Signed-off-by: Rodrigo Vivi Signed-off-by: Sasha Levin --- drivers/gpu/drm/i915/gt/intel_reset.c | 37 ++++++++++++++++++++------- 1 file changed, 28 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c index b6697c1d260a56..18b0e57c58c1ef 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.c +++ b/drivers/gpu/drm/i915/gt/intel_reset.c @@ -293,9 +293,9 @@ static int gen6_hw_domain_reset(struct intel_gt *gt, u32 hw_domain_mask) return err; } -static int gen6_reset_engines(struct intel_gt *gt, - intel_engine_mask_t engine_mask, - unsigned int retry) +static int __gen6_reset_engines(struct intel_gt *gt, + intel_engine_mask_t engine_mask, + unsigned int retry) { static const u32 hw_engine_mask[] = { [RCS0] = GEN6_GRDOM_RENDER, @@ -322,6 +322,20 @@ static int gen6_reset_engines(struct intel_gt *gt, return gen6_hw_domain_reset(gt, hw_mask); } +static int gen6_reset_engines(struct intel_gt *gt, + intel_engine_mask_t engine_mask, + unsigned int retry) +{ + unsigned long flags; + int ret; + + spin_lock_irqsave(>->uncore->lock, flags); + ret = __gen6_reset_engines(gt, engine_mask, retry); + spin_unlock_irqrestore(>->uncore->lock, flags); + + return ret; +} + static struct intel_engine_cs *find_sfc_paired_vecs_engine(struct intel_engine_cs *engine) { int vecs_id; @@ -488,9 +502,9 @@ static void gen11_unlock_sfc(struct intel_engine_cs *engine) rmw_clear_fw(uncore, sfc_lock.lock_reg, sfc_lock.lock_bit); } -static int gen11_reset_engines(struct intel_gt *gt, - intel_engine_mask_t engine_mask, - unsigned int retry) +static int __gen11_reset_engines(struct intel_gt *gt, + intel_engine_mask_t engine_mask, + unsigned int retry) { static const u32 hw_engine_mask[] = { [RCS0] = GEN11_GRDOM_RENDER, @@ -601,8 +615,11 @@ static int gen8_reset_engines(struct intel_gt *gt, struct intel_engine_cs *engine; const bool reset_non_ready = retry >= 1; intel_engine_mask_t tmp; + unsigned long flags; int ret; + spin_lock_irqsave(>->uncore->lock, flags); + for_each_engine_masked(engine, gt, engine_mask, tmp) { ret = gen8_engine_reset_prepare(engine); if (ret && !reset_non_ready) @@ -630,17 +647,19 @@ static int gen8_reset_engines(struct intel_gt *gt, * This is best effort, so ignore any error from the initial reset. */ if (IS_DG2(gt->i915) && engine_mask == ALL_ENGINES) - gen11_reset_engines(gt, gt->info.engine_mask, 0); + __gen11_reset_engines(gt, gt->info.engine_mask, 0); if (GRAPHICS_VER(gt->i915) >= 11) - ret = gen11_reset_engines(gt, engine_mask, retry); + ret = __gen11_reset_engines(gt, engine_mask, retry); else - ret = gen6_reset_engines(gt, engine_mask, retry); + ret = __gen6_reset_engines(gt, engine_mask, retry); skip_reset: for_each_engine_masked(engine, gt, engine_mask, tmp) gen8_engine_reset_cancel(engine); + spin_unlock_irqrestore(>->uncore->lock, flags); + return ret; } From 86062ca5edf1c2acc4de26452a34ba001e9b6a68 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 12 Jul 2022 16:21:33 +0100 Subject: [PATCH 0570/1056] drm/i915/gt: Serialize TLB invalidates with GT resets MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit a1c5a7bf79c1faa5633b918b5c0666545e84c4d1 ] Avoid trying to invalidate the TLB in the middle of performing an engine reset, as this may result in the reset timing out. Currently, the TLB invalidate is only serialised by its own mutex, forgoing the uncore lock, but we can take the uncore->lock as well to serialise the mmio access, thereby serialising with the GDRST. Tested on a NUC5i7RYB, BIOS RYBDWi35.86A.0380.2019.0517.1530 with i915 selftest/hangcheck. Cc: stable@vger.kernel.org # v4.4 and upper Fixes: 7938d61591d3 ("drm/i915: Flush TLBs before releasing backing store") Reported-by: Mauro Carvalho Chehab Tested-by: Mauro Carvalho Chehab Reviewed-by: Mauro Carvalho Chehab Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Andi Shyti Acked-by: Thomas Hellström Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/1e59a7c45dd919a530256b9ac721ac6ea86c0677.1657639152.git.mchehab@kernel.org (cherry picked from commit 33da97894758737895e90c909f16786052680ef4) Signed-off-by: Rodrigo Vivi Signed-off-by: Sasha Levin --- drivers/gpu/drm/i915/gt/intel_gt.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index e1e1d17d49fdd3..3a76000d15bfd1 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -970,6 +970,20 @@ void intel_gt_invalidate_tlbs(struct intel_gt *gt) mutex_lock(>->tlb_invalidate_lock); intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL); + spin_lock_irq(&uncore->lock); /* serialise invalidate with GT reset */ + + for_each_engine(engine, gt, id) { + struct reg_and_bit rb; + + rb = get_reg_and_bit(engine, regs == gen8_regs, regs, num); + if (!i915_mmio_reg_offset(rb.reg)) + continue; + + intel_uncore_write_fw(uncore, rb.reg, rb.bit); + } + + spin_unlock_irq(&uncore->lock); + for_each_engine(engine, gt, id) { /* * HW architecture suggest typical invalidation time at 40us, @@ -984,7 +998,6 @@ void intel_gt_invalidate_tlbs(struct intel_gt *gt) if (!i915_mmio_reg_offset(rb.reg)) continue; - intel_uncore_write_fw(uncore, rb.reg, rb.bit); if (__intel_wait_for_register_fw(uncore, rb.reg, rb.bit, 0, timeout_us, timeout_ms, From 60d1bb301ea5a4be3e1071d3d0c179140b270ef8 Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Fri, 10 Dec 2021 16:07:54 -0800 Subject: [PATCH 0571/1056] drm/i915/uc: correctly track uc_fw init failure [ Upstream commit 35d4efec103e1afde968cfc9305f00f9aceb19cc ] The FAILURE state of uc_fw currently implies that the fw is loadable (i.e init completed), so we can't use it for init failures and instead need a dedicated error code. Note that this currently does not cause any issues because if we fail to init any of the firmwares we abort the load, but better be accurate anyway in case things change in the future. Signed-off-by: Daniele Ceraolo Spurio Reviewed-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20211211000756.1698923-2-daniele.ceraolospurio@intel.com Signed-off-by: Sasha Levin --- drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c | 2 +- drivers/gpu/drm/i915/gt/uc/intel_huc.c | 2 +- drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c | 4 ++-- drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h | 17 +++++++++++------ 4 files changed, 15 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c index 76fe766ad1bc61..bb951b8d520335 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c @@ -159,6 +159,6 @@ int intel_guc_fw_upload(struct intel_guc *guc) return 0; out: - intel_uc_fw_change_status(&guc->fw, INTEL_UC_FIRMWARE_FAIL); + intel_uc_fw_change_status(&guc->fw, INTEL_UC_FIRMWARE_LOAD_FAIL); return ret; } diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc.c b/drivers/gpu/drm/i915/gt/uc/intel_huc.c index fc5387b410a2bb..9ee22ac9254095 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_huc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_huc.c @@ -191,7 +191,7 @@ int intel_huc_auth(struct intel_huc *huc) fail: i915_probe_error(gt->i915, "HuC: Authentication failed %d\n", ret); - intel_uc_fw_change_status(&huc->fw, INTEL_UC_FIRMWARE_FAIL); + intel_uc_fw_change_status(&huc->fw, INTEL_UC_FIRMWARE_LOAD_FAIL); return ret; } diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c index 3a16d08608a544..6be7fbf9d18a8f 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c @@ -526,7 +526,7 @@ int intel_uc_fw_upload(struct intel_uc_fw *uc_fw, u32 dst_offset, u32 dma_flags) i915_probe_error(gt->i915, "Failed to load %s firmware %s (%d)\n", intel_uc_fw_type_repr(uc_fw->type), uc_fw->path, err); - intel_uc_fw_change_status(uc_fw, INTEL_UC_FIRMWARE_FAIL); + intel_uc_fw_change_status(uc_fw, INTEL_UC_FIRMWARE_LOAD_FAIL); return err; } @@ -544,7 +544,7 @@ int intel_uc_fw_init(struct intel_uc_fw *uc_fw) if (err) { DRM_DEBUG_DRIVER("%s fw pin-pages err=%d\n", intel_uc_fw_type_repr(uc_fw->type), err); - intel_uc_fw_change_status(uc_fw, INTEL_UC_FIRMWARE_FAIL); + intel_uc_fw_change_status(uc_fw, INTEL_UC_FIRMWARE_INIT_FAIL); } return err; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h index 99bb1fe1af6686..c1a7246fb7d6e1 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h @@ -31,11 +31,12 @@ struct intel_gt; * | | MISSING <--/ | \--> ERROR | * | fetch | V | * | | AVAILABLE | - * +------------+- | -+ + * +------------+- | \ -+ + * | | | \--> INIT FAIL | * | init | V | * | | /------> LOADABLE <----<-----------\ | * +------------+- \ / \ \ \ -+ - * | | FAIL <--< \--> TRANSFERRED \ | + * | | LOAD FAIL <--< \--> TRANSFERRED \ | * | upload | \ / \ / | * | | \---------/ \--> RUNNING | * +------------+---------------------------------------------------+ @@ -49,8 +50,9 @@ enum intel_uc_fw_status { INTEL_UC_FIRMWARE_MISSING, /* blob not found on the system */ INTEL_UC_FIRMWARE_ERROR, /* invalid format or version */ INTEL_UC_FIRMWARE_AVAILABLE, /* blob found and copied in mem */ + INTEL_UC_FIRMWARE_INIT_FAIL, /* failed to prepare fw objects for load */ INTEL_UC_FIRMWARE_LOADABLE, /* all fw-required objects are ready */ - INTEL_UC_FIRMWARE_FAIL, /* failed to xfer or init/auth the fw */ + INTEL_UC_FIRMWARE_LOAD_FAIL, /* failed to xfer or init/auth the fw */ INTEL_UC_FIRMWARE_TRANSFERRED, /* dma xfer done */ INTEL_UC_FIRMWARE_RUNNING /* init/auth done */ }; @@ -121,10 +123,12 @@ const char *intel_uc_fw_status_repr(enum intel_uc_fw_status status) return "ERROR"; case INTEL_UC_FIRMWARE_AVAILABLE: return "AVAILABLE"; + case INTEL_UC_FIRMWARE_INIT_FAIL: + return "INIT FAIL"; case INTEL_UC_FIRMWARE_LOADABLE: return "LOADABLE"; - case INTEL_UC_FIRMWARE_FAIL: - return "FAIL"; + case INTEL_UC_FIRMWARE_LOAD_FAIL: + return "LOAD FAIL"; case INTEL_UC_FIRMWARE_TRANSFERRED: return "TRANSFERRED"; case INTEL_UC_FIRMWARE_RUNNING: @@ -146,7 +150,8 @@ static inline int intel_uc_fw_status_to_error(enum intel_uc_fw_status status) return -ENOENT; case INTEL_UC_FIRMWARE_ERROR: return -ENOEXEC; - case INTEL_UC_FIRMWARE_FAIL: + case INTEL_UC_FIRMWARE_INIT_FAIL: + case INTEL_UC_FIRMWARE_LOAD_FAIL: return -EIO; case INTEL_UC_FIRMWARE_SELECTED: return -ESTALE; From a6cecaf058c48c6def2548473d814a2d54cb3667 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= Date: Tue, 21 Dec 2021 21:00:50 +0100 Subject: [PATCH 0572/1056] drm/i915: Require the vm mutex for i915_vma_bind() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit c2ea703dcafccf18d7d77d8b68fb08c2d9842b7a ] Protect updates of struct i915_vma flags and async binding / unbinding with the vm::mutex. This means that i915_vma_bind() needs to assert vm::mutex held. In order to make that possible drop the caching of kmap_atomic() maps around i915_vma_bind(). An alternative would be to use kmap_local() but since we block cpu unplugging during sleeps inside kmap_local() sections this may have unwanted side-effects. Particularly since we might wait for gpu while holding the vm mutex. This change may theoretically increase execbuf cpu-usage on snb, but at least on non-highmem systems that increase should be very small. Signed-off-by: Thomas Hellström Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20211221200050.436316-5-thomas.hellstrom@linux.intel.com Signed-off-by: Sasha Levin --- .../gpu/drm/i915/gem/i915_gem_execbuffer.c | 50 ++++++++++++++++++- drivers/gpu/drm/i915/i915_vma.c | 1 + 2 files changed, 50 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index 1aa249908b645a..0d480867fc0c27 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -1060,6 +1060,47 @@ static inline struct i915_ggtt *cache_to_ggtt(struct reloc_cache *cache) return &i915->ggtt; } +static void reloc_cache_unmap(struct reloc_cache *cache) +{ + void *vaddr; + + if (!cache->vaddr) + return; + + vaddr = unmask_page(cache->vaddr); + if (cache->vaddr & KMAP) + kunmap_atomic(vaddr); + else + io_mapping_unmap_atomic((void __iomem *)vaddr); +} + +static void reloc_cache_remap(struct reloc_cache *cache, + struct drm_i915_gem_object *obj) +{ + void *vaddr; + + if (!cache->vaddr) + return; + + if (cache->vaddr & KMAP) { + struct page *page = i915_gem_object_get_page(obj, cache->page); + + vaddr = kmap_atomic(page); + cache->vaddr = unmask_flags(cache->vaddr) | + (unsigned long)vaddr; + } else { + struct i915_ggtt *ggtt = cache_to_ggtt(cache); + unsigned long offset; + + offset = cache->node.start; + if (!drm_mm_node_allocated(&cache->node)) + offset += cache->page << PAGE_SHIFT; + + cache->vaddr = (unsigned long) + io_mapping_map_atomic_wc(&ggtt->iomap, offset); + } +} + static void reloc_cache_reset(struct reloc_cache *cache, struct i915_execbuffer *eb) { void *vaddr; @@ -1324,10 +1365,17 @@ eb_relocate_entry(struct i915_execbuffer *eb, * batchbuffers. */ if (reloc->write_domain == I915_GEM_DOMAIN_INSTRUCTION && - GRAPHICS_VER(eb->i915) == 6) { + GRAPHICS_VER(eb->i915) == 6 && + !i915_vma_is_bound(target->vma, I915_VMA_GLOBAL_BIND)) { + struct i915_vma *vma = target->vma; + + reloc_cache_unmap(&eb->reloc_cache); + mutex_lock(&vma->vm->mutex); err = i915_vma_bind(target->vma, target->vma->obj->cache_level, PIN_GLOBAL, NULL); + mutex_unlock(&vma->vm->mutex); + reloc_cache_remap(&eb->reloc_cache, ev->vma->obj); if (err) return err; } diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index dfd20060812bcd..3df304edabc79a 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -376,6 +376,7 @@ int i915_vma_bind(struct i915_vma *vma, u32 bind_flags; u32 vma_flags; + lockdep_assert_held(&vma->vm->mutex); GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); GEM_BUG_ON(vma->size > vma->node.size); From 0260a9aa5d5c33c5a9f62cb41c0587b2efc389ef Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Mon, 11 Jul 2022 22:26:15 -0400 Subject: [PATCH 0573/1056] bnxt_en: Fix bnxt_reinit_after_abort() code path [ Upstream commit 4279414bff8af9898e8c53ae6c5bc17f68ad67b7 ] bnxt_reinit_after_abort() is called during ifup when a previous FW reset sequence has aborted or a previous ifup has failed after detecting FW reset. In all cases, it is safe to assume that a previous FW reset has completed and the driver may not have fully reinitialized. Prior to this patch, it is assumed that the FUNC_DRV_IF_CHANGE_RESP_FLAGS_HOT_FW_RESET_DONE flag will always be set by the firmware in bnxt_hwrm_if_change(). This may not be true if the driver has already attempted to register with the firmware. The firmware may not set the RESET_DONE flag again after the driver has registered, assuming that the driver has seen the flag already. Fix it to always go through the FW reset initialization path if the BNXT_STATE_FW_RESET_DET flag is set. This flag is always set by the driver after successfully going through bnxt_reinit_after_abort(). Fixes: 6882c36cf82e ("bnxt_en: attempt to reinitialize after aborted reset") Reviewed-by: Pavan Chebbi Signed-off-by: Michael Chan Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index cb53149455890b..6962abe2358b9d 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -9806,7 +9806,8 @@ static int bnxt_hwrm_if_change(struct bnxt *bp, bool up) if (flags & FUNC_DRV_IF_CHANGE_RESP_FLAGS_RESC_CHANGE) resc_reinit = true; - if (flags & FUNC_DRV_IF_CHANGE_RESP_FLAGS_HOT_FW_RESET_DONE) + if (flags & FUNC_DRV_IF_CHANGE_RESP_FLAGS_HOT_FW_RESET_DONE || + test_bit(BNXT_STATE_FW_RESET_DET, &bp->state)) fw_reset = true; else if (bp->fw_health && !bp->fw_health->status_reliable) bnxt_try_map_fw_health_reg(bp); From 3852f048be6017c89bcd9b5fc69cba540e9bfd75 Mon Sep 17 00:00:00 2001 From: Pavan Chebbi Date: Mon, 11 Jul 2022 22:26:18 -0400 Subject: [PATCH 0574/1056] bnxt_en: Fix bnxt_refclk_read() [ Upstream commit ddde5412fdaa5048bbca31529d46cb8da882870c ] The upper 32-bit PHC register is not latched when reading the lower 32-bit PHC register. Current code leaves a small window where we may not read correct higher order bits if the lower order bits are just about to wrap around. This patch fixes this by reading higher order bits twice and makes sure that final value is correctly paired with its lower 32 bits. Fixes: 30e96f487f64 ("bnxt_en: Do not read the PTP PHC during chip reset") Cc: Richard Cochran Signed-off-by: Pavan Chebbi Signed-off-by: Michael Chan Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c index 62a931de5b1a74..a78cc65a38f2f7 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c @@ -61,14 +61,23 @@ static int bnxt_refclk_read(struct bnxt *bp, struct ptp_system_timestamp *sts, u64 *ns) { struct bnxt_ptp_cfg *ptp = bp->ptp_cfg; + u32 high_before, high_now, low; if (test_bit(BNXT_STATE_IN_FW_RESET, &bp->state)) return -EIO; + high_before = readl(bp->bar0 + ptp->refclk_mapped_regs[1]); ptp_read_system_prets(sts); - *ns = readl(bp->bar0 + ptp->refclk_mapped_regs[0]); + low = readl(bp->bar0 + ptp->refclk_mapped_regs[0]); ptp_read_system_postts(sts); - *ns |= (u64)readl(bp->bar0 + ptp->refclk_mapped_regs[1]) << 32; + high_now = readl(bp->bar0 + ptp->refclk_mapped_regs[1]); + if (high_now != high_before) { + ptp_read_system_prets(sts); + low = readl(bp->bar0 + ptp->refclk_mapped_regs[0]); + ptp_read_system_postts(sts); + } + *ns = ((u64)high_now << 32) | low; + return 0; } From e58b02e445463065b4078bf621561da75197853f Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 11 Jul 2022 17:15:19 -0700 Subject: [PATCH 0575/1056] sysctl: Fix data-races in proc_dou8vec_minmax(). [ Upstream commit 7dee5d7747a69aa2be41f04c6a7ecfe3ac8cdf18 ] A sysctl variable is accessed concurrently, and there is always a chance of data-race. So, all readers and writers need some basic protection to avoid load/store-tearing. This patch changes proc_dou8vec_minmax() to use READ_ONCE() and WRITE_ONCE() internally to fix data-races on the sysctl side. For now, proc_dou8vec_minmax() itself is tolerant to a data-race, but we still need to add annotations on the other subsystem's side. Fixes: cb9444130662 ("sysctl: add proc_dou8vec_minmax()") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- kernel/sysctl.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 5be8108a9a45ee..357900d0cef9a4 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1138,13 +1138,13 @@ int proc_dou8vec_minmax(struct ctl_table *table, int write, tmp.maxlen = sizeof(val); tmp.data = &val; - val = *data; + val = READ_ONCE(*data); res = do_proc_douintvec(&tmp, write, buffer, lenp, ppos, do_proc_douintvec_minmax_conv, ¶m); if (res) return res; if (write) - *data = val; + WRITE_ONCE(*data, val); return 0; } EXPORT_SYMBOL_GPL(proc_dou8vec_minmax); From a716a3846c5ff32982ab1cc1ab27592221621958 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 11 Jul 2022 17:15:20 -0700 Subject: [PATCH 0576/1056] sysctl: Fix data-races in proc_dointvec_ms_jiffies(). [ Upstream commit 7d1025e559782b58824b36cb8ad547a69f2e4b31 ] A sysctl variable is accessed concurrently, and there is always a chance of data-race. So, all readers and writers need some basic protection to avoid load/store-tearing. This patch changes proc_dointvec_ms_jiffies() to use READ_ONCE() and WRITE_ONCE() internally to fix data-races on the sysctl side. For now, proc_dointvec_ms_jiffies() itself is tolerant to a data-race, but we still need to add annotations on the other subsystem's side. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- kernel/sysctl.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 357900d0cef9a4..79cbfd0fa3be92 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1415,9 +1415,9 @@ static int do_proc_dointvec_ms_jiffies_conv(bool *negp, unsigned long *lvalp, if (jif > INT_MAX) return 1; - *valp = (int)jif; + WRITE_ONCE(*valp, (int)jif); } else { - int val = *valp; + int val = READ_ONCE(*valp); unsigned long lval; if (val < 0) { *negp = true; @@ -1485,8 +1485,8 @@ int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write, * @ppos: the current position in the file * * Reads/writes up to table->maxlen/sizeof(unsigned int) integer - * values from/to the user buffer, treated as an ASCII string. - * The values read are assumed to be in 1/1000 seconds, and + * values from/to the user buffer, treated as an ASCII string. + * The values read are assumed to be in 1/1000 seconds, and * are converted into jiffies. * * Returns 0 on success. From cce955efa0ab81f7fb72e22beed372054c86005c Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 11 Jul 2022 17:15:23 -0700 Subject: [PATCH 0577/1056] icmp: Fix data-races around sysctl_icmp_echo_enable_probe. [ Upstream commit 4a2f7083cc6cb72dade9a63699ca352fad26d1cd ] While reading sysctl_icmp_echo_enable_probe, it can be changed concurrently. Thus, we need to add READ_ONCE() to its readers. Fixes: d329ea5bd884 ("icmp: add response to RFC 8335 PROBE messages") Fixes: 1fd07f33c3ea ("ipv6: ICMPV6: add response to ICMPV6 RFC 8335 PROBE messages") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/ipv4/icmp.c | 2 +- net/ipv6/icmp.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index b5766b62ca97a2..91be44180dd558 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -1034,7 +1034,7 @@ bool icmp_build_probe(struct sk_buff *skb, struct icmphdr *icmphdr) u16 ident_len; u8 status; - if (!net->ipv4.sysctl_icmp_echo_enable_probe) + if (!READ_ONCE(net->ipv4.sysctl_icmp_echo_enable_probe)) return false; /* We currently only support probing interfaces on the proxy node diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 96c5cc0f30cebb..716e7717fe8fee 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -927,7 +927,7 @@ static int icmpv6_rcv(struct sk_buff *skb) break; case ICMPV6_EXT_ECHO_REQUEST: if (!net->ipv6.sysctl.icmpv6_echo_ignore_all && - net->ipv4.sysctl_icmp_echo_enable_probe) + READ_ONCE(net->ipv4.sysctl_icmp_echo_enable_probe)) icmpv6_echo_reply(skb); break; From 44021c2929cf5f6afb10c606cacde4256cf03205 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 11 Jul 2022 17:15:25 -0700 Subject: [PATCH 0578/1056] icmp: Fix a data-race around sysctl_icmp_ignore_bogus_error_responses. [ Upstream commit b04f9b7e85c7d7aecbada620e8759a662af068d3 ] While reading sysctl_icmp_ignore_bogus_error_responses, it can be changed concurrently. Thus, we need to add READ_ONCE() to its reader. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/ipv4/icmp.c | 2 +- net/ipv4/sysctl_net_ipv4.c | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 91be44180dd558..6f444b2b7d1a32 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -939,7 +939,7 @@ static bool icmp_unreach(struct sk_buff *skb) * get the other vendor to fix their kit. */ - if (!net->ipv4.sysctl_icmp_ignore_bogus_error_responses && + if (!READ_ONCE(net->ipv4.sysctl_icmp_ignore_bogus_error_responses) && inet_addr_type_dev_table(net, skb->dev, iph->daddr) == RTN_BROADCAST) { net_warn_ratelimited("%pI4 sent an invalid ICMP type %u, code %u error to a broadcast: %pI4 on %s\n", &ip_hdr(skb)->saddr, diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 6f1e64d4923287..51863031b17868 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -639,6 +639,8 @@ static struct ctl_table ipv4_net_table[] = { .maxlen = sizeof(u8), .mode = 0644, .proc_handler = proc_dou8vec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE }, { .procname = "icmp_errors_use_inbound_ifaddr", From de9490c32bc10020efdd1509689a28f197d6dfb8 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 11 Jul 2022 17:15:26 -0700 Subject: [PATCH 0579/1056] icmp: Fix a data-race around sysctl_icmp_errors_use_inbound_ifaddr. [ Upstream commit d2efabce81db7eed1c98fa1a3f203f0edd738ac3 ] While reading sysctl_icmp_errors_use_inbound_ifaddr, it can be changed concurrently. Thus, we need to add READ_ONCE() to its reader. Fixes: 1c2fb7f93cb2 ("[IPV4]: Sysctl configurable icmp error source address.") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/ipv4/icmp.c | 2 +- net/ipv4/sysctl_net_ipv4.c | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 6f444b2b7d1a32..e40a79bb0a6ec0 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -702,7 +702,7 @@ void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info, rcu_read_lock(); if (rt_is_input_route(rt) && - net->ipv4.sysctl_icmp_errors_use_inbound_ifaddr) + READ_ONCE(net->ipv4.sysctl_icmp_errors_use_inbound_ifaddr)) dev = dev_get_by_index_rcu(net, inet_iif(skb_in)); if (dev) diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 51863031b17868..49a8167dda875c 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -648,6 +648,8 @@ static struct ctl_table ipv4_net_table[] = { .maxlen = sizeof(u8), .mode = 0644, .proc_handler = proc_dou8vec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE }, { .procname = "icmp_ratelimit", From 7c1b6e453481710d6bed76a490ee13996e835823 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 11 Jul 2022 17:15:27 -0700 Subject: [PATCH 0580/1056] icmp: Fix a data-race around sysctl_icmp_ratelimit. [ Upstream commit 2a4eb714841f288cf51c7d942d98af6a8c6e4b01 ] While reading sysctl_icmp_ratelimit, it can be changed concurrently. Thus, we need to add READ_ONCE() to its reader. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/ipv4/icmp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index e40a79bb0a6ec0..3a36c198965d9e 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -328,7 +328,8 @@ static bool icmpv4_xrlim_allow(struct net *net, struct rtable *rt, vif = l3mdev_master_ifindex(dst->dev); peer = inet_getpeer_v4(net->ipv4.peers, fl4->daddr, vif, 1); - rc = inet_peer_xrlim_allow(peer, net->ipv4.sysctl_icmp_ratelimit); + rc = inet_peer_xrlim_allow(peer, + READ_ONCE(net->ipv4.sysctl_icmp_ratelimit)); if (peer) inet_putpeer(peer); out: From 8d7a13654a988a8433bfbe3474f83b82c0dea6ab Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 11 Jul 2022 17:15:28 -0700 Subject: [PATCH 0581/1056] icmp: Fix a data-race around sysctl_icmp_ratemask. [ Upstream commit 1ebcb25ad6fc3d50fca87350acf451b9a66dd31e ] While reading sysctl_icmp_ratemask, it can be changed concurrently. Thus, we need to add READ_ONCE() to its reader. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/ipv4/icmp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 3a36c198965d9e..a5cc89506c1e49 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -290,7 +290,7 @@ static bool icmpv4_mask_allow(struct net *net, int type, int code) return true; /* Limit if icmp type is enabled in ratemask. */ - if (!((1 << type) & net->ipv4.sysctl_icmp_ratemask)) + if (!((1 << type) & READ_ONCE(net->ipv4.sysctl_icmp_ratemask))) return true; return false; From 46e9c46203fd4676720ddca0fef7eff26826648e Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 11 Jul 2022 17:15:29 -0700 Subject: [PATCH 0582/1056] raw: Fix a data-race around sysctl_raw_l3mdev_accept. [ Upstream commit 1dace014928e6e385363032d359a04dee9158af0 ] While reading sysctl_raw_l3mdev_accept, it can be changed concurrently. Thus, we need to add READ_ONCE() to its reader. Fixes: 6897445fb194 ("net: provide a sysctl raw_l3mdev_accept for raw socket lookup with VRFs") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- include/net/raw.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/net/raw.h b/include/net/raw.h index 8ad8df59485364..c51a635671a73d 100644 --- a/include/net/raw.h +++ b/include/net/raw.h @@ -75,7 +75,7 @@ static inline bool raw_sk_bound_dev_eq(struct net *net, int bound_dev_if, int dif, int sdif) { #if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV) - return inet_bound_dev_eq(!!net->ipv4.sysctl_raw_l3mdev_accept, + return inet_bound_dev_eq(READ_ONCE(net->ipv4.sysctl_raw_l3mdev_accept), bound_dev_if, dif, sdif); #else return inet_bound_dev_eq(true, bound_dev_if, dif, sdif); From 8bcf7339f2cf70ea4461df6ea045d1aadfabfa11 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 11 Jul 2022 17:15:31 -0700 Subject: [PATCH 0583/1056] tcp: Fix a data-race around sysctl_tcp_ecn_fallback. [ Upstream commit 12b8d9ca7e678abc48195294494f1815b555d658 ] While reading sysctl_tcp_ecn_fallback, it can be changed concurrently. Thus, we need to add READ_ONCE() to its reader. Fixes: 492135557dc0 ("tcp: add rfc3168, section 6.1.1.1. fallback") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/ipv4/sysctl_net_ipv4.c | 2 ++ net/ipv4/tcp_output.c | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 49a8167dda875c..616658e7c79682 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -696,6 +696,8 @@ static struct ctl_table ipv4_net_table[] = { .maxlen = sizeof(u8), .mode = 0644, .proc_handler = proc_dou8vec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, }, { .procname = "ip_dynaddr", diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index dc3b4668fcde9d..509aab1b7ac994 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -346,7 +346,7 @@ static void tcp_ecn_send_syn(struct sock *sk, struct sk_buff *skb) static void tcp_ecn_clear_syn(struct sock *sk, struct sk_buff *skb) { - if (sock_net(sk)->ipv4.sysctl_tcp_ecn_fallback) + if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_ecn_fallback)) /* tp->ecn_flags are cleared at a later point in time when * SYN ACK is ultimatively being received. */ From 6b26fb2fe227317f6afdc8ffbdbe7819aecce643 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 11 Jul 2022 17:15:32 -0700 Subject: [PATCH 0584/1056] ipv4: Fix data-races around sysctl_ip_dynaddr. [ Upstream commit e49e4aff7ec19b2d0d0957ee30e93dade57dab9e ] While reading sysctl_ip_dynaddr, it can be changed concurrently. Thus, we need to add READ_ONCE() to its readers. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- Documentation/networking/ip-sysctl.rst | 2 +- net/ipv4/af_inet.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst index 8d772484755af5..b8b67041f95521 100644 --- a/Documentation/networking/ip-sysctl.rst +++ b/Documentation/networking/ip-sysctl.rst @@ -1157,7 +1157,7 @@ ip_autobind_reuse - BOOLEAN option should only be set by experts. Default: 0 -ip_dynaddr - BOOLEAN +ip_dynaddr - INTEGER If set non-zero, enables support for dynamic addresses. If set to a non-zero value larger than 1, a kernel log message will be printed when dynamic address rewriting diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 77534b44b8c7ca..44f21278003d17 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1251,7 +1251,7 @@ static int inet_sk_reselect_saddr(struct sock *sk) if (new_saddr == old_saddr) return 0; - if (sock_net(sk)->ipv4.sysctl_ip_dynaddr > 1) { + if (READ_ONCE(sock_net(sk)->ipv4.sysctl_ip_dynaddr) > 1) { pr_info("%s(): shifting inet->saddr from %pI4 to %pI4\n", __func__, &old_saddr, &new_saddr); } @@ -1306,7 +1306,7 @@ int inet_sk_rebuild_header(struct sock *sk) * Other protocols have to map its equivalent state to TCP_SYN_SENT. * DCCP maps its DCCP_REQUESTING state to TCP_SYN_SENT. -acme */ - if (!sock_net(sk)->ipv4.sysctl_ip_dynaddr || + if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_ip_dynaddr) || sk->sk_state != TCP_SYN_SENT || (sk->sk_userlocks & SOCK_BINDADDR_LOCK) || (err = inet_sk_reselect_saddr(sk)) != 0) From 0d17723afea3ae8c9f245c9bbd2ba5945b77e812 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 11 Jul 2022 17:15:33 -0700 Subject: [PATCH 0585/1056] nexthop: Fix data-races around nexthop_compat_mode. [ Upstream commit bdf00bf24bef9be1ca641a6390fd5487873e0d2e ] While reading nexthop_compat_mode, it can be changed concurrently. Thus, we need to add READ_ONCE() to its readers. Fixes: 4f80116d3df3 ("net: ipv4: add sysctl for nexthop api compatibility mode") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/ipv4/fib_semantics.c | 2 +- net/ipv4/nexthop.c | 5 +++-- net/ipv6/route.c | 2 +- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index a98350dacbc3fd..674694d8ac61db 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -1829,7 +1829,7 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event, goto nla_put_failure; if (nexthop_is_blackhole(fi->nh)) rtm->rtm_type = RTN_BLACKHOLE; - if (!fi->fib_net->ipv4.sysctl_nexthop_compat_mode) + if (!READ_ONCE(fi->fib_net->ipv4.sysctl_nexthop_compat_mode)) goto offload; } diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c index 5dbd4b5505ebaa..cc8f120149f6b6 100644 --- a/net/ipv4/nexthop.c +++ b/net/ipv4/nexthop.c @@ -1857,7 +1857,7 @@ static void __remove_nexthop_fib(struct net *net, struct nexthop *nh) /* __ip6_del_rt does a release, so do a hold here */ fib6_info_hold(f6i); ipv6_stub->ip6_del_rt(net, f6i, - !net->ipv4.sysctl_nexthop_compat_mode); + !READ_ONCE(net->ipv4.sysctl_nexthop_compat_mode)); } } @@ -2362,7 +2362,8 @@ static int insert_nexthop(struct net *net, struct nexthop *new_nh, if (!rc) { nh_base_seq_inc(net); nexthop_notify(RTM_NEWNEXTHOP, new_nh, &cfg->nlinfo); - if (replace_notify && net->ipv4.sysctl_nexthop_compat_mode) + if (replace_notify && + READ_ONCE(net->ipv4.sysctl_nexthop_compat_mode)) nexthop_replace_notify(net, new_nh, &cfg->nlinfo); } diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 4ca754c360a354..27274fc3619ab5 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -5756,7 +5756,7 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb, if (nexthop_is_blackhole(rt->nh)) rtm->rtm_type = RTN_BLACKHOLE; - if (net->ipv4.sysctl_nexthop_compat_mode && + if (READ_ONCE(net->ipv4.sysctl_nexthop_compat_mode) && rt6_fill_node_nexthop(skb, rt->nh, &nh_flags) < 0) goto nla_put_failure; From 4225a78eb4d75b88831b3038947fd659763926cb Mon Sep 17 00:00:00 2001 From: Liang He Date: Tue, 12 Jul 2022 14:14:17 +0800 Subject: [PATCH 0586/1056] net: ftgmac100: Hold reference returned by of_get_child_by_name() [ Upstream commit 49b9f431ff0d845a36be0b3ede35ec324f2e5fee ] In ftgmac100_probe(), we should hold the refernece returned by of_get_child_by_name() and use it to call of_node_put() for reference balance. Fixes: 39bfab8844a0 ("net: ftgmac100: Add support for DT phy-handle property") Signed-off-by: Liang He Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/faraday/ftgmac100.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/faraday/ftgmac100.c b/drivers/net/ethernet/faraday/ftgmac100.c index 0b833572205f39..4a2dadb91f0249 100644 --- a/drivers/net/ethernet/faraday/ftgmac100.c +++ b/drivers/net/ethernet/faraday/ftgmac100.c @@ -1746,6 +1746,19 @@ static int ftgmac100_setup_clk(struct ftgmac100 *priv) return rc; } +static bool ftgmac100_has_child_node(struct device_node *np, const char *name) +{ + struct device_node *child_np = of_get_child_by_name(np, name); + bool ret = false; + + if (child_np) { + ret = true; + of_node_put(child_np); + } + + return ret; +} + static int ftgmac100_probe(struct platform_device *pdev) { struct resource *res; @@ -1865,7 +1878,7 @@ static int ftgmac100_probe(struct platform_device *pdev) /* Display what we found */ phy_attached_info(phy); - } else if (np && !of_get_child_by_name(np, "mdio")) { + } else if (np && !ftgmac100_has_child_node(np, "mdio")) { /* Support legacy ASPEED devicetree descriptions that decribe a * MAC with an embedded MDIO controller but have no "mdio" * child node. Automatically scan the MDIO bus for available From f4bd3202a2b4194ab6c0ce61628095d54f994db4 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 12 Jul 2022 17:42:25 +0300 Subject: [PATCH 0587/1056] net: stmmac: fix leaks in probe [ Upstream commit 23aa6d5088e3bd65de77c5c307237b9937f8b48a ] These two error paths should clean up before returning. Fixes: 2bb4b98b60d7 ("net: stmmac: Add Ingenic SoCs MAC support.") Signed-off-by: Dan Carpenter Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/stmicro/stmmac/dwmac-ingenic.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-ingenic.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-ingenic.c index 9a6d819b84aead..378b4dd826bb5e 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-ingenic.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-ingenic.c @@ -273,7 +273,8 @@ static int ingenic_mac_probe(struct platform_device *pdev) mac->tx_delay = tx_delay_ps * 1000; } else { dev_err(&pdev->dev, "Invalid TX clock delay: %dps\n", tx_delay_ps); - return -EINVAL; + ret = -EINVAL; + goto err_remove_config_dt; } } @@ -283,7 +284,8 @@ static int ingenic_mac_probe(struct platform_device *pdev) mac->rx_delay = rx_delay_ps * 1000; } else { dev_err(&pdev->dev, "Invalid RX clock delay: %dps\n", rx_delay_ps); - return -EINVAL; + ret = -EINVAL; + goto err_remove_config_dt; } } From 2340428c90d43472f317125a69dace12e80927b6 Mon Sep 17 00:00:00 2001 From: Coiby Xu Date: Wed, 13 Jul 2022 15:21:11 +0800 Subject: [PATCH 0588/1056] ima: force signature verification when CONFIG_KEXEC_SIG is configured [ Upstream commit af16df54b89dee72df253abc5e7b5e8a6d16c11c ] Currently, an unsigned kernel could be kexec'ed when IMA arch specific policy is configured unless lockdown is enabled. Enforce kernel signature verification check in the kexec_file_load syscall when IMA arch specific policy is configured. Fixes: 99d5cadfde2b ("kexec_file: split KEXEC_VERIFY_SIG into KEXEC_SIG and KEXEC_SIG_FORCE") Reported-and-suggested-by: Mimi Zohar Signed-off-by: Coiby Xu Signed-off-by: Mimi Zohar Signed-off-by: Sasha Levin --- include/linux/kexec.h | 6 ++++++ kernel/kexec_file.c | 11 ++++++++++- security/integrity/ima/ima_efi.c | 2 ++ 3 files changed, 18 insertions(+), 1 deletion(-) diff --git a/include/linux/kexec.h b/include/linux/kexec.h index 33be67c2f7ff8c..cf042d41c87b9d 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -452,6 +452,12 @@ static inline int kexec_crash_loaded(void) { return 0; } #define kexec_in_progress false #endif /* CONFIG_KEXEC_CORE */ +#ifdef CONFIG_KEXEC_SIG +void set_kexec_sig_enforced(void); +#else +static inline void set_kexec_sig_enforced(void) {} +#endif + #endif /* !defined(__ASSEBMLY__) */ #endif /* LINUX_KEXEC_H */ diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c index e289e60ba3adcd..f7a4fd4d243f47 100644 --- a/kernel/kexec_file.c +++ b/kernel/kexec_file.c @@ -29,6 +29,15 @@ #include #include "kexec_internal.h" +#ifdef CONFIG_KEXEC_SIG +static bool sig_enforce = IS_ENABLED(CONFIG_KEXEC_SIG_FORCE); + +void set_kexec_sig_enforced(void) +{ + sig_enforce = true; +} +#endif + static int kexec_calculate_store_digests(struct kimage *image); /* @@ -159,7 +168,7 @@ kimage_validate_signature(struct kimage *image) image->kernel_buf_len); if (ret) { - if (IS_ENABLED(CONFIG_KEXEC_SIG_FORCE)) { + if (sig_enforce) { pr_notice("Enforced kernel signature verification failed (%d).\n", ret); return ret; } diff --git a/security/integrity/ima/ima_efi.c b/security/integrity/ima/ima_efi.c index 71786d01946f45..9db66fe310d424 100644 --- a/security/integrity/ima/ima_efi.c +++ b/security/integrity/ima/ima_efi.c @@ -67,6 +67,8 @@ const char * const *arch_get_ima_policy(void) if (IS_ENABLED(CONFIG_IMA_ARCH_POLICY) && arch_ima_get_secureboot()) { if (IS_ENABLED(CONFIG_MODULE_SIG)) set_module_sig_enforced(); + if (IS_ENABLED(CONFIG_KEXEC_SIG)) + set_kexec_sig_enforced(); return sb_arch_rules; } return NULL; From 601ae26aa2802a4c10c94d7388a99eabdbefab2b Mon Sep 17 00:00:00 2001 From: Jianglei Nie Date: Tue, 12 Jul 2022 09:10:37 +0800 Subject: [PATCH 0589/1056] ima: Fix potential memory leak in ima_init_crypto() [ Upstream commit 067d2521874135267e681c19d42761c601d503d6 ] On failure to allocate the SHA1 tfm, IMA fails to initialize and exits without freeing the ima_algo_array. Add the missing kfree() for ima_algo_array to avoid the potential memory leak. Signed-off-by: Jianglei Nie Fixes: 6d94809af6b0 ("ima: Allocate and initialize tfm for each PCR bank") Signed-off-by: Mimi Zohar Signed-off-by: Sasha Levin --- security/integrity/ima/ima_crypto.c | 1 + 1 file changed, 1 insertion(+) diff --git a/security/integrity/ima/ima_crypto.c b/security/integrity/ima/ima_crypto.c index a7206cc1d7d191..64499056648ad1 100644 --- a/security/integrity/ima/ima_crypto.c +++ b/security/integrity/ima/ima_crypto.c @@ -205,6 +205,7 @@ int __init ima_init_crypto(void) crypto_free_shash(ima_algo_array[i].tfm); } + kfree(ima_algo_array); out: crypto_free_shash(ima_shash_tfm); return rc; From cded1186f7e930045fb4ee17dbfa6bae41f3882c Mon Sep 17 00:00:00 2001 From: Mario Kleiner Date: Mon, 11 Jul 2022 19:39:28 +0200 Subject: [PATCH 0590/1056] drm/amd/display: Only use depth 36 bpp linebuffers on DCN display engines. [ Upstream commit add61d3c31de6a4b5e11a2ab96aaf4c873481568 ] Various DCE versions had trouble with 36 bpp lb depth, requiring fixes, last time in commit 353ca0fa5630 ("drm/amd/display: Fix 10bit 4K display on CIK GPUs") for DCE-8. So far >= DCE-11.2 was considered ok, but now I found out that on DCE-11.2 it causes dithering when there shouldn't be any, so identity pixel passthrough with identity gamma LUTs doesn't work when it should. This breaks various important neuroscience applications, as reported to me by scientific users of Polaris cards under Ubuntu 22.04 with Linux 5.15, and confirmed by testing it myself on DCE-11.2. Lets only use depth 36 for DCN engines, where my testing showed that it is both necessary for high color precision output, e.g., RGBA16 fb's, and not harmful, as far as more than one year in real-world use showed. DCE engines seem to work fine for high precision output at 30 bpp, so this ("famous last words") depth 30 should hopefully fix all known problems without introducing new ones. Successfully retested on DCE-11.2 Polaris and DCN-1.0 Raven Ridge on top of Linux 5.19.0-rc2 + drm-next. Fixes: 353ca0fa5630 ("drm/amd/display: Fix 10bit 4K display on CIK GPUs") Signed-off-by: Mario Kleiner Tested-by: Mario Kleiner Cc: stable@vger.kernel.org # 5.14.0 Cc: Alex Deucher Cc: Harry Wentland Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/display/dc/core/dc_resource.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index 82f1f27baaf383..188556e41b9d55 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -1062,12 +1062,13 @@ bool resource_build_scaling_params(struct pipe_ctx *pipe_ctx) * on certain displays, such as the Sharp 4k. 36bpp is needed * to support SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616 and * SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616 with actual > 10 bpc - * precision on at least DCN display engines. However, at least - * Carrizo with DCE_VERSION_11_0 does not like 36 bpp lb depth, - * so use only 30 bpp on DCE_VERSION_11_0. Testing with DCE 11.2 and 8.3 - * did not show such problems, so this seems to be the exception. + * precision on DCN display engines, but apparently not for DCE, as + * far as testing on DCE-11.2 and DCE-8 showed. Various DCE parts have + * problems: Carrizo with DCE_VERSION_11_0 does not like 36 bpp lb depth, + * neither do DCE-8 at 4k resolution, or DCE-11.2 (broken identify pixel + * passthrough). Therefore only use 36 bpp on DCN where it is actually needed. */ - if (plane_state->ctx->dce_version > DCE_VERSION_11_0) + if (plane_state->ctx->dce_version > DCE_VERSION_MAX) pipe_ctx->plane_res.scl_data.lb_params.depth = LB_PIXEL_DEPTH_36BPP; else pipe_ctx->plane_res.scl_data.lb_params.depth = LB_PIXEL_DEPTH_30BPP; From 8c37e7a2000d795aaad7256950f43c25f2aac67f Mon Sep 17 00:00:00 2001 From: Yefim Barashkin Date: Mon, 11 Jul 2022 14:35:11 -0800 Subject: [PATCH 0591/1056] drm/amd/pm: Prevent divide by zero MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 0638c98c17aa12fe914459c82cd178247e21fb2b ] divide error: 0000 [#1] SMP PTI CPU: 3 PID: 78925 Comm: tee Not tainted 5.15.50-1-lts #1 Hardware name: MSI MS-7A59/Z270 SLI PLUS (MS-7A59), BIOS 1.90 01/30/2018 RIP: 0010:smu_v11_0_set_fan_speed_rpm+0x11/0x110 [amdgpu] Speed is user-configurable through a file. I accidentally set it to zero, and the driver crashed. Reviewed-by: Evan Quan Reviewed-by: André Almeida Signed-off-by: Yefim Barashkin Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c index e6c93396434ff6..614c3d0495141a 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c @@ -1235,6 +1235,8 @@ int smu_v11_0_set_fan_speed_rpm(struct smu_context *smu, uint32_t crystal_clock_freq = 2500; uint32_t tach_period; + if (speed == 0) + return -EINVAL; /* * To prevent from possible overheat, some ASICs may have requirement * for minimum fan speed: From 58d93e9d160c0de6d867c7eb4c2206671a351eb1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=8D=C3=B1igo=20Huguet?= Date: Tue, 12 Jul 2022 08:26:42 +0200 Subject: [PATCH 0592/1056] sfc: fix use after free when disabling sriov MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit ebe41da5d47ac0fff877e57bd14c54dccf168827 ] Use after free is detected by kfence when disabling sriov. What was read after being freed was vf->pci_dev: it was freed from pci_disable_sriov and later read in efx_ef10_sriov_free_vf_vports, called from efx_ef10_sriov_free_vf_vswitching. Set the pointer to NULL at release time to not trying to read it later. Reproducer and dmesg log (note that kfence doesn't detect it every time): $ echo 1 > /sys/class/net/enp65s0f0np0/device/sriov_numvfs $ echo 0 > /sys/class/net/enp65s0f0np0/device/sriov_numvfs BUG: KFENCE: use-after-free read in efx_ef10_sriov_free_vf_vswitching+0x82/0x170 [sfc] Use-after-free read at 0x00000000ff3c1ba5 (in kfence-#224): efx_ef10_sriov_free_vf_vswitching+0x82/0x170 [sfc] efx_ef10_pci_sriov_disable+0x38/0x70 [sfc] efx_pci_sriov_configure+0x24/0x40 [sfc] sriov_numvfs_store+0xfe/0x140 kernfs_fop_write_iter+0x11c/0x1b0 new_sync_write+0x11f/0x1b0 vfs_write+0x1eb/0x280 ksys_write+0x5f/0xe0 do_syscall_64+0x5c/0x80 entry_SYSCALL_64_after_hwframe+0x44/0xae kfence-#224: 0x00000000edb8ef95-0x00000000671f5ce1, size=2792, cache=kmalloc-4k allocated by task 6771 on cpu 10 at 3137.860196s: pci_alloc_dev+0x21/0x60 pci_iov_add_virtfn+0x2a2/0x320 sriov_enable+0x212/0x3e0 efx_ef10_sriov_configure+0x67/0x80 [sfc] efx_pci_sriov_configure+0x24/0x40 [sfc] sriov_numvfs_store+0xba/0x140 kernfs_fop_write_iter+0x11c/0x1b0 new_sync_write+0x11f/0x1b0 vfs_write+0x1eb/0x280 ksys_write+0x5f/0xe0 do_syscall_64+0x5c/0x80 entry_SYSCALL_64_after_hwframe+0x44/0xae freed by task 6771 on cpu 12 at 3170.991309s: device_release+0x34/0x90 kobject_cleanup+0x3a/0x130 pci_iov_remove_virtfn+0xd9/0x120 sriov_disable+0x30/0xe0 efx_ef10_pci_sriov_disable+0x57/0x70 [sfc] efx_pci_sriov_configure+0x24/0x40 [sfc] sriov_numvfs_store+0xfe/0x140 kernfs_fop_write_iter+0x11c/0x1b0 new_sync_write+0x11f/0x1b0 vfs_write+0x1eb/0x280 ksys_write+0x5f/0xe0 do_syscall_64+0x5c/0x80 entry_SYSCALL_64_after_hwframe+0x44/0xae Fixes: 3c5eb87605e85 ("sfc: create vports for VFs and assign random MAC addresses") Reported-by: Yanghang Liu Signed-off-by: Íñigo Huguet Acked-by: Martin Habets Link: https://lore.kernel.org/r/20220712062642.6915-1-ihuguet@redhat.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/sfc/ef10_sriov.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/sfc/ef10_sriov.c b/drivers/net/ethernet/sfc/ef10_sriov.c index 752d6406f07ed0..f488461a23d1c5 100644 --- a/drivers/net/ethernet/sfc/ef10_sriov.c +++ b/drivers/net/ethernet/sfc/ef10_sriov.c @@ -408,8 +408,9 @@ static int efx_ef10_pci_sriov_enable(struct efx_nic *efx, int num_vfs) static int efx_ef10_pci_sriov_disable(struct efx_nic *efx, bool force) { struct pci_dev *dev = efx->pci_dev; + struct efx_ef10_nic_data *nic_data = efx->nic_data; unsigned int vfs_assigned = pci_vfs_assigned(dev); - int rc = 0; + int i, rc = 0; if (vfs_assigned && !force) { netif_info(efx, drv, efx->net_dev, "VFs are assigned to guests; " @@ -417,10 +418,13 @@ static int efx_ef10_pci_sriov_disable(struct efx_nic *efx, bool force) return -EBUSY; } - if (!vfs_assigned) + if (!vfs_assigned) { + for (i = 0; i < efx->vf_count; i++) + nic_data->vf[i].pci_dev = NULL; pci_disable_sriov(dev); - else + } else { rc = -EBUSY; + } efx_ef10_sriov_free_vf_vswitching(efx); efx->vf_count = 0; From 5464c8987ddfab587e14c00030a633c803b13d98 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 17 Jan 2022 14:32:12 -0500 Subject: [PATCH 0593/1056] ceph: switch netfs read ops to use rreq->inode instead of rreq->mapping->host [ Upstream commit a25cedb4313d35e1f2968105678a47ca28e84d3b ] One fewer pointer dereference, and in the future we may not be able to count on the mapping pointer being populated (e.g. in the DIO case). Signed-off-by: Jeff Layton Signed-off-by: Ilya Dryomov Signed-off-by: Sasha Levin --- fs/ceph/addr.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 99b80b5c7a931c..b218a26291b8e0 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -179,7 +179,7 @@ static int ceph_releasepage(struct page *page, gfp_t gfp) static void ceph_netfs_expand_readahead(struct netfs_read_request *rreq) { - struct inode *inode = rreq->mapping->host; + struct inode *inode = rreq->inode; struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_file_layout *lo = &ci->i_layout; u32 blockoff; @@ -196,7 +196,7 @@ static void ceph_netfs_expand_readahead(struct netfs_read_request *rreq) static bool ceph_netfs_clamp_length(struct netfs_read_subrequest *subreq) { - struct inode *inode = subreq->rreq->mapping->host; + struct inode *inode = subreq->rreq->inode; struct ceph_fs_client *fsc = ceph_inode_to_client(inode); struct ceph_inode_info *ci = ceph_inode(inode); u64 objno, objoff; @@ -242,7 +242,7 @@ static void finish_netfs_read(struct ceph_osd_request *req) static void ceph_netfs_issue_op(struct netfs_read_subrequest *subreq) { struct netfs_read_request *rreq = subreq->rreq; - struct inode *inode = rreq->mapping->host; + struct inode *inode = rreq->inode; struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_fs_client *fsc = ceph_inode_to_client(inode); struct ceph_osd_request *req; From 859081fb60e6257b11ea9ee243d85b80a669a7ba Mon Sep 17 00:00:00 2001 From: Andrea Mayer Date: Tue, 12 Jul 2022 19:58:35 +0200 Subject: [PATCH 0594/1056] seg6: fix skb checksum evaluation in SRH encapsulation/insertion [ Upstream commit df8386d13ea280d55beee1b95f61a59234a3798b ] Support for SRH encapsulation and insertion was introduced with commit 6c8702c60b88 ("ipv6: sr: add support for SRH encapsulation and injection with lwtunnels"), through the seg6_do_srh_encap() and seg6_do_srh_inline() functions, respectively. The former encapsulates the packet in an outer IPv6 header along with the SRH, while the latter inserts the SRH between the IPv6 header and the payload. Then, the headers are initialized/updated according to the operating mode (i.e., encap/inline). Finally, the skb checksum is calculated to reflect the changes applied to the headers. The IPv6 payload length ('payload_len') is not initialized within seg6_do_srh_{inline,encap}() but is deferred in seg6_do_srh(), i.e. the caller of seg6_do_srh_{inline,encap}(). However, this operation invalidates the skb checksum, since the 'payload_len' is updated only after the checksum is evaluated. To solve this issue, the initialization of the IPv6 payload length is moved from seg6_do_srh() directly into the seg6_do_srh_{inline,encap}() functions and before the skb checksum update takes place. Fixes: 6c8702c60b88 ("ipv6: sr: add support for SRH encapsulation and injection with lwtunnels") Reported-by: Paolo Abeni Link: https://lore.kernel.org/all/20220705190727.69d532417be7438b15404ee1@uniroma2.it Signed-off-by: Andrea Mayer Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- net/ipv6/seg6_iptunnel.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c index d64855010948db..e756ba705fd9b4 100644 --- a/net/ipv6/seg6_iptunnel.c +++ b/net/ipv6/seg6_iptunnel.c @@ -189,6 +189,8 @@ int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto) } #endif + hdr->payload_len = htons(skb->len - sizeof(struct ipv6hdr)); + skb_postpush_rcsum(skb, hdr, tot_len); return 0; @@ -241,6 +243,8 @@ int seg6_do_srh_inline(struct sk_buff *skb, struct ipv6_sr_hdr *osrh) } #endif + hdr->payload_len = htons(skb->len - sizeof(struct ipv6hdr)); + skb_postpush_rcsum(skb, hdr, sizeof(struct ipv6hdr) + hdrlen); return 0; @@ -302,7 +306,6 @@ static int seg6_do_srh(struct sk_buff *skb) break; } - ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr)); skb_set_transport_header(skb, sizeof(struct ipv6hdr)); nf_reset_ct(skb); From 15e8b6274c490269fe16ce355d1105cf1f5c8131 Mon Sep 17 00:00:00 2001 From: Andrea Mayer Date: Tue, 12 Jul 2022 19:58:36 +0200 Subject: [PATCH 0595/1056] seg6: fix skb checksum in SRv6 End.B6 and End.B6.Encaps behaviors [ Upstream commit f048880fc77058d864aff5c674af7918b30f312a ] The SRv6 End.B6 and End.B6.Encaps behaviors rely on functions seg6_do_srh_{encap,inline}() to, respectively: i) encapsulate the packet within an outer IPv6 header with the specified Segment Routing Header (SRH); ii) insert the specified SRH directly after the IPv6 header of the packet. This patch removes the initialization of the IPv6 header payload length from the input_action_end_b6{_encap}() functions, as it is now handled properly by seg6_do_srh_{encap,inline}() to avoid corruption of the skb checksum. Fixes: 140f04c33bbc ("ipv6: sr: implement several seg6local actions") Signed-off-by: Andrea Mayer Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- net/ipv6/seg6_local.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/net/ipv6/seg6_local.c b/net/ipv6/seg6_local.c index ef88489c71f526..59454285d5c582 100644 --- a/net/ipv6/seg6_local.c +++ b/net/ipv6/seg6_local.c @@ -824,7 +824,6 @@ static int input_action_end_b6(struct sk_buff *skb, struct seg6_local_lwt *slwt) if (err) goto drop; - ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr)); skb_set_transport_header(skb, sizeof(struct ipv6hdr)); seg6_lookup_nexthop(skb, NULL, 0); @@ -856,7 +855,6 @@ static int input_action_end_b6_encap(struct sk_buff *skb, if (err) goto drop; - ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr)); skb_set_transport_header(skb, sizeof(struct ipv6hdr)); seg6_lookup_nexthop(skb, NULL, 0); From 2d84fcb6e6f74c96eb03e6a1b9eed0a164e37b2c Mon Sep 17 00:00:00 2001 From: Andrea Mayer Date: Tue, 12 Jul 2022 19:58:37 +0200 Subject: [PATCH 0596/1056] seg6: bpf: fix skb checksum in bpf_push_seg6_encap() [ Upstream commit 4889fbd98deaf243c3baadc54e296d71c6af1eb0 ] Both helper functions bpf_lwt_seg6_action() and bpf_lwt_push_encap() use the bpf_push_seg6_encap() to encapsulate the packet in an IPv6 with Segment Routing Header (SRH) or insert an SRH between the IPv6 header and the payload. To achieve this result, such helper functions rely on bpf_push_seg6_encap() which, in turn, leverages seg6_do_srh_{encap,inline}() to perform the required operation (i.e. encap/inline). This patch removes the initialization of the IPv6 header payload length from bpf_push_seg6_encap(), as it is now handled properly by seg6_do_srh_{encap,inline}() to prevent corruption of the skb checksum. Fixes: fe94cc290f53 ("bpf: Add IPv6 Segment Routing helpers") Signed-off-by: Andrea Mayer Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- net/core/filter.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/core/filter.c b/net/core/filter.c index d1e2ef77ce4c15..8b2bc855714b5a 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -5851,7 +5851,6 @@ static int bpf_push_seg6_encap(struct sk_buff *skb, u32 type, void *hdr, u32 len if (err) return err; - ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr)); skb_set_transport_header(skb, sizeof(struct ipv6hdr)); return seg6_lookup_nexthop(skb, NULL, 0); From 68e5f32f0de9594629ff9e599294d9801c6187de Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=8D=C3=B1igo=20Huguet?= Date: Wed, 13 Jul 2022 11:21:16 +0200 Subject: [PATCH 0597/1056] sfc: fix kernel panic when creating VF MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit ada74c5539eba06cf8b47d068f92e0b3963a9a6e ] When creating VFs a kernel panic can happen when calling to efx_ef10_try_update_nic_stats_vf. When releasing a DMA coherent buffer, sometimes, I don't know in what specific circumstances, it has to unmap memory with vunmap. It is disallowed to do that in IRQ context or with BH disabled. Otherwise, we hit this line in vunmap, causing the crash: BUG_ON(in_interrupt()); This patch reenables BH to release the buffer. Log messages when the bug is hit: kernel BUG at mm/vmalloc.c:2727! invalid opcode: 0000 [#1] PREEMPT SMP NOPTI CPU: 6 PID: 1462 Comm: NetworkManager Kdump: loaded Tainted: G I --------- --- 5.14.0-119.el9.x86_64 #1 Hardware name: Dell Inc. PowerEdge R740/06WXJT, BIOS 2.8.2 08/27/2020 RIP: 0010:vunmap+0x2e/0x30 ...skip... Call Trace: __iommu_dma_free+0x96/0x100 efx_nic_free_buffer+0x2b/0x40 [sfc] efx_ef10_try_update_nic_stats_vf+0x14a/0x1c0 [sfc] efx_ef10_update_stats_vf+0x18/0x40 [sfc] efx_start_all+0x15e/0x1d0 [sfc] efx_net_open+0x5a/0xe0 [sfc] __dev_open+0xe7/0x1a0 __dev_change_flags+0x1d7/0x240 dev_change_flags+0x21/0x60 ...skip... Fixes: d778819609a2 ("sfc: DMA the VF stats only when requested") Reported-by: Ma Yuying Signed-off-by: Íñigo Huguet Acked-by: Edward Cree Link: https://lore.kernel.org/r/20220713092116.21238-1-ihuguet@redhat.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/ethernet/sfc/ef10.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c index c1cd1c97f09ddd..056c24ec1249a7 100644 --- a/drivers/net/ethernet/sfc/ef10.c +++ b/drivers/net/ethernet/sfc/ef10.c @@ -1932,7 +1932,10 @@ static int efx_ef10_try_update_nic_stats_vf(struct efx_nic *efx) efx_update_sw_stats(efx, stats); out: + /* releasing a DMA coherent buffer with BH disabled can panic */ + spin_unlock_bh(&efx->stats_lock); efx_nic_free_buffer(efx, &stats_buf); + spin_lock_bh(&efx->stats_lock); return rc; } From 9b1cb795a7ab3d71ff9819db763d03827747c2ed Mon Sep 17 00:00:00 2001 From: "Chia-Lin Kao (AceLan)" Date: Wed, 13 Jul 2022 19:12:23 +0800 Subject: [PATCH 0598/1056] net: atlantic: remove deep parameter on suspend/resume functions [ Upstream commit 0f33250760384e05c36466b0a2f92f3c6007ba92 ] Below commit claims that atlantic NIC requires to reset the device on pm op, and had set the deep to true for all suspend/resume functions. commit 1809c30b6e5a ("net: atlantic: always deep reset on pm op, fixing up my null deref regression") So, we could remove deep parameter on suspend/resume functions without any functional change. Fixes: 1809c30b6e5a ("net: atlantic: always deep reset on pm op, fixing up my null deref regression") Signed-off-by: Chia-Lin Kao (AceLan) Link: https://lore.kernel.org/r/20220713111224.1535938-1-acelan.kao@canonical.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- .../ethernet/aquantia/atlantic/aq_pci_func.c | 24 ++++++++----------- 1 file changed, 10 insertions(+), 14 deletions(-) diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c b/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c index 831833911a5256..dbd5263130f9e6 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c @@ -379,7 +379,7 @@ static void aq_pci_shutdown(struct pci_dev *pdev) } } -static int aq_suspend_common(struct device *dev, bool deep) +static int aq_suspend_common(struct device *dev) { struct aq_nic_s *nic = pci_get_drvdata(to_pci_dev(dev)); @@ -392,17 +392,15 @@ static int aq_suspend_common(struct device *dev, bool deep) if (netif_running(nic->ndev)) aq_nic_stop(nic); - if (deep) { - aq_nic_deinit(nic, !nic->aq_hw->aq_nic_cfg->wol); - aq_nic_set_power(nic); - } + aq_nic_deinit(nic, !nic->aq_hw->aq_nic_cfg->wol); + aq_nic_set_power(nic); rtnl_unlock(); return 0; } -static int atl_resume_common(struct device *dev, bool deep) +static int atl_resume_common(struct device *dev) { struct pci_dev *pdev = to_pci_dev(dev); struct aq_nic_s *nic; @@ -415,10 +413,8 @@ static int atl_resume_common(struct device *dev, bool deep) pci_set_power_state(pdev, PCI_D0); pci_restore_state(pdev); - if (deep) { - /* Reinitialize Nic/Vecs objects */ - aq_nic_deinit(nic, !nic->aq_hw->aq_nic_cfg->wol); - } + /* Reinitialize Nic/Vecs objects */ + aq_nic_deinit(nic, !nic->aq_hw->aq_nic_cfg->wol); if (netif_running(nic->ndev)) { ret = aq_nic_init(nic); @@ -444,22 +440,22 @@ static int atl_resume_common(struct device *dev, bool deep) static int aq_pm_freeze(struct device *dev) { - return aq_suspend_common(dev, true); + return aq_suspend_common(dev); } static int aq_pm_suspend_poweroff(struct device *dev) { - return aq_suspend_common(dev, true); + return aq_suspend_common(dev); } static int aq_pm_thaw(struct device *dev) { - return atl_resume_common(dev, true); + return atl_resume_common(dev); } static int aq_pm_resume_restore(struct device *dev) { - return atl_resume_common(dev, true); + return atl_resume_common(dev); } static const struct dev_pm_ops aq_pm_ops = { From 58c90993933e7a76305519bc70da8c0cd2907c1f Mon Sep 17 00:00:00 2001 From: "Chia-Lin Kao (AceLan)" Date: Wed, 13 Jul 2022 19:12:24 +0800 Subject: [PATCH 0599/1056] net: atlantic: remove aq_nic_deinit() when resume [ Upstream commit 2e15c51fefaffaf9f72255eaef4fada05055e4c5 ] aq_nic_deinit() has been called while suspending, so we don't have to call it again on resume. Actually, call it again leads to another hang issue when resuming from S3. Jul 8 03:09:44 u-Precision-7865-Tower kernel: [ 5910.992345] Call Trace: Jul 8 03:09:44 u-Precision-7865-Tower kernel: [ 5910.992346] Jul 8 03:09:44 u-Precision-7865-Tower kernel: [ 5910.992348] aq_nic_deinit+0xb4/0xd0 [atlantic] Jul 8 03:09:44 u-Precision-7865-Tower kernel: [ 5910.992356] aq_pm_thaw+0x7f/0x100 [atlantic] Jul 8 03:09:44 u-Precision-7865-Tower kernel: [ 5910.992362] pci_pm_resume+0x5c/0x90 Jul 8 03:09:44 u-Precision-7865-Tower kernel: [ 5910.992366] ? pci_pm_thaw+0x80/0x80 Jul 8 03:09:44 u-Precision-7865-Tower kernel: [ 5910.992368] dpm_run_callback+0x4e/0x120 Jul 8 03:09:44 u-Precision-7865-Tower kernel: [ 5910.992371] device_resume+0xad/0x200 Jul 8 03:09:44 u-Precision-7865-Tower kernel: [ 5910.992373] async_resume+0x1e/0x40 Jul 8 03:09:44 u-Precision-7865-Tower kernel: [ 5910.992374] async_run_entry_fn+0x33/0x120 Jul 8 03:09:44 u-Precision-7865-Tower kernel: [ 5910.992377] process_one_work+0x220/0x3c0 Jul 8 03:09:44 u-Precision-7865-Tower kernel: [ 5910.992380] worker_thread+0x4d/0x3f0 Jul 8 03:09:44 u-Precision-7865-Tower kernel: [ 5910.992382] ? process_one_work+0x3c0/0x3c0 Jul 8 03:09:44 u-Precision-7865-Tower kernel: [ 5910.992384] kthread+0x12a/0x150 Jul 8 03:09:44 u-Precision-7865-Tower kernel: [ 5910.992386] ? set_kthread_struct+0x40/0x40 Jul 8 03:09:44 u-Precision-7865-Tower kernel: [ 5910.992387] ret_from_fork+0x22/0x30 Jul 8 03:09:44 u-Precision-7865-Tower kernel: [ 5910.992391] Jul 8 03:09:44 u-Precision-7865-Tower kernel: [ 5910.992392] ---[ end trace 1ec8c79604ed5e0d ]--- Jul 8 03:09:44 u-Precision-7865-Tower kernel: [ 5910.992394] PM: dpm_run_callback(): pci_pm_resume+0x0/0x90 returns -110 Jul 8 03:09:44 u-Precision-7865-Tower kernel: [ 5910.992397] atlantic 0000:02:00.0: PM: failed to resume async: error -110 Fixes: 1809c30b6e5a ("net: atlantic: always deep reset on pm op, fixing up my null deref regression") Signed-off-by: Chia-Lin Kao (AceLan) Link: https://lore.kernel.org/r/20220713111224.1535938-2-acelan.kao@canonical.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c b/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c index dbd5263130f9e6..8647125d60aef1 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c @@ -413,9 +413,6 @@ static int atl_resume_common(struct device *dev) pci_set_power_state(pdev, PCI_D0); pci_restore_state(pdev); - /* Reinitialize Nic/Vecs objects */ - aq_nic_deinit(nic, !nic->aq_hw->aq_nic_cfg->wol); - if (netif_running(nic->ndev)) { ret = aq_nic_init(nic); if (ret) From a0706d7c142942dd891494c6fdd7d80f89363b66 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Fri, 8 Jul 2022 14:51:47 +0200 Subject: [PATCH 0600/1056] KVM: x86: Fully initialize 'struct kvm_lapic_irq' in kvm_pv_kick_cpu_op() [ Upstream commit 8a414f943f8b5f94bbaafdec863d6f3dbef33f8a ] 'vector' and 'trig_mode' fields of 'struct kvm_lapic_irq' are left uninitialized in kvm_pv_kick_cpu_op(). While these fields are normally not needed for APIC_DM_REMRD, they're still referenced by __apic_accept_irq() for trace_kvm_apic_accept_irq(). Fully initialize the structure to avoid consuming random stack memory. Fixes: a183b638b61c ("KVM: x86: make apic_accept_irq tracepoint more generic") Reported-by: syzbot+d6caa905917d353f0d07@syzkaller.appspotmail.com Signed-off-by: Vitaly Kuznetsov Reviewed-by: Sean Christopherson Message-Id: <20220708125147.593975-1-vkuznets@redhat.com> Signed-off-by: Paolo Bonzini Signed-off-by: Sasha Levin --- arch/x86/kvm/x86.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 8974884ef2ad52..732c3f2f8ded4d 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -8713,15 +8713,17 @@ static int kvm_pv_clock_pairing(struct kvm_vcpu *vcpu, gpa_t paddr, */ static void kvm_pv_kick_cpu_op(struct kvm *kvm, unsigned long flags, int apicid) { - struct kvm_lapic_irq lapic_irq; - - lapic_irq.shorthand = APIC_DEST_NOSHORT; - lapic_irq.dest_mode = APIC_DEST_PHYSICAL; - lapic_irq.level = 0; - lapic_irq.dest_id = apicid; - lapic_irq.msi_redir_hint = false; + /* + * All other fields are unused for APIC_DM_REMRD, but may be consumed by + * common code, e.g. for tracing. Defer initialization to the compiler. + */ + struct kvm_lapic_irq lapic_irq = { + .delivery_mode = APIC_DM_REMRD, + .dest_mode = APIC_DEST_PHYSICAL, + .shorthand = APIC_DEST_NOSHORT, + .dest_id = apicid, + }; - lapic_irq.delivery_mode = APIC_DM_REMRD; kvm_irq_delivery_to_apic(kvm, NULL, &lapic_irq, NULL); } From 724ec407f9978ef890c3d585c5e1d93ef200339e Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Thu, 14 Jul 2022 10:07:54 +0300 Subject: [PATCH 0601/1056] net/tls: Check for errors in tls_device_init [ Upstream commit 3d8c51b25a235e283e37750943bbf356ef187230 ] Add missing error checks in tls_device_init. Fixes: e8f69799810c ("net/tls: Add generic NIC offload infrastructure") Reported-by: Jakub Kicinski Reviewed-by: Maxim Mikityanskiy Signed-off-by: Tariq Toukan Link: https://lore.kernel.org/r/20220714070754.1428-1-tariqt@nvidia.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- include/net/tls.h | 4 ++-- net/tls/tls_device.c | 4 ++-- net/tls/tls_main.c | 7 ++++++- 3 files changed, 10 insertions(+), 5 deletions(-) diff --git a/include/net/tls.h b/include/net/tls.h index 1fffb206f09f51..bf3d63a5278850 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -707,7 +707,7 @@ int tls_sw_fallback_init(struct sock *sk, struct tls_crypto_info *crypto_info); #ifdef CONFIG_TLS_DEVICE -void tls_device_init(void); +int tls_device_init(void); void tls_device_cleanup(void); void tls_device_sk_destruct(struct sock *sk); int tls_set_device_offload(struct sock *sk, struct tls_context *ctx); @@ -727,7 +727,7 @@ static inline bool tls_is_sk_rx_device_offloaded(struct sock *sk) return tls_get_ctx(sk)->rx_conf == TLS_HW; } #else -static inline void tls_device_init(void) {} +static inline int tls_device_init(void) { return 0; } static inline void tls_device_cleanup(void) {} static inline int diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c index f3e3d009cf1cfd..4775431cbd3866 100644 --- a/net/tls/tls_device.c +++ b/net/tls/tls_device.c @@ -1394,9 +1394,9 @@ static struct notifier_block tls_dev_notifier = { .notifier_call = tls_dev_event, }; -void __init tls_device_init(void) +int __init tls_device_init(void) { - register_netdevice_notifier(&tls_dev_notifier); + return register_netdevice_notifier(&tls_dev_notifier); } void __exit tls_device_cleanup(void) diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index 62b1c5e32bbd73..a947cfb100bda0 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -910,7 +910,12 @@ static int __init tls_register(void) if (err) return err; - tls_device_init(); + err = tls_device_init(); + if (err) { + unregister_pernet_subsys(&tls_proc_ops); + return err; + } + tcp_register_ulp(&tcp_tls_ulp_ops); return 0; From 204d12228697942609233fbee635e0449dd6cb1a Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 13 Jul 2022 23:11:01 +0200 Subject: [PATCH 0602/1056] ACPI: video: Fix acpi_video_handles_brightness_key_presses() [ Upstream commit 5ad26161a371e4aa2d2553286f0cac580987a493 ] Commit 3a0cf7ab8df3 ("ACPI: video: Change how we determine if brightness key-presses are handled") made acpi_video_handles_brightness_key_presses() report false when none of the ACPI Video Devices support backlight control. But it turns out that at least on a Dell Inspiron N4010 there is no ACPI backlight control, yet brightness hotkeys are still reported through the ACPI Video Bus; and since acpi_video_handles_brightness_key_presses() now returns false, brightness keypresses are now reported twice. To fix this rename the has_backlight flag to may_report_brightness_keys and also set it the first time a brightness key press event is received. Depending on the delivery of the other ACPI (WMI) event vs the ACPI Video Bus event this means that the first brightness key press might still get reported twice, but all further keypresses will be filtered as before. Note that this relies on other drivers reporting brightness key events calling acpi_video_handles_brightness_key_presses() when delivering the events (rather then once during driver probe). This is already required and documented in include/acpi/video.h: /* * Note: The value returned by acpi_video_handles_brightness_key_presses() * may change over time and should not be cached. */ Fixes: 3a0cf7ab8df3 ("ACPI: video: Change how we determine if brightness key-presses are handled") Link: https://lore.kernel.org/regressions/CALF=6jEe5G8+r1Wo0vvz4GjNQQhdkLT5p8uCHn6ZXhg4nsOWow@mail.gmail.com/ Reported-and-tested-by: Ben Greening Signed-off-by: Hans de Goede Acked-by: Rafael J. Wysocki Link: https://lore.kernel.org/r/20220713211101.85547-2-hdegoede@redhat.com Signed-off-by: Sasha Levin --- drivers/acpi/acpi_video.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/acpi/acpi_video.c b/drivers/acpi/acpi_video.c index 007deb3a8ea37a..390af28f6fafea 100644 --- a/drivers/acpi/acpi_video.c +++ b/drivers/acpi/acpi_video.c @@ -73,7 +73,7 @@ module_param(device_id_scheme, bool, 0444); static int only_lcd = -1; module_param(only_lcd, int, 0444); -static bool has_backlight; +static bool may_report_brightness_keys; static int register_count; static DEFINE_MUTEX(register_count_mutex); static DEFINE_MUTEX(video_list_lock); @@ -1224,7 +1224,7 @@ acpi_video_bus_get_one_device(struct acpi_device *device, acpi_video_device_find_cap(data); if (data->cap._BCM && data->cap._BCL) - has_backlight = true; + may_report_brightness_keys = true; mutex_lock(&video->device_list_lock); list_add_tail(&data->entry, &video->video_device_list); @@ -1693,6 +1693,9 @@ static void acpi_video_device_notify(acpi_handle handle, u32 event, void *data) break; } + if (keycode) + may_report_brightness_keys = true; + acpi_notifier_call_chain(device, event, 0); if (keycode && (report_key_events & REPORT_BRIGHTNESS_KEY_EVENTS)) { @@ -2255,7 +2258,7 @@ void acpi_video_unregister(void) if (register_count) { acpi_bus_unregister_driver(&acpi_video_bus); register_count = 0; - has_backlight = false; + may_report_brightness_keys = false; } mutex_unlock(®ister_count_mutex); } @@ -2277,7 +2280,7 @@ void acpi_video_unregister_backlight(void) bool acpi_video_handles_brightness_key_presses(void) { - return has_backlight && + return may_report_brightness_keys && (report_key_events & REPORT_BRIGHTNESS_KEY_EVENTS); } EXPORT_SYMBOL(acpi_video_handles_brightness_key_presses); From a06248fa62ef584daacaeb0aa12f8009ad297492 Mon Sep 17 00:00:00 2001 From: Muchun Song Date: Thu, 9 Jun 2022 18:40:32 +0800 Subject: [PATCH 0603/1056] mm: sysctl: fix missing numa_stat when !CONFIG_HUGETLB_PAGE [ Upstream commit 43b5240ca6b33108998810593248186b1e3ae34a ] "numa_stat" should not be included in the scope of CONFIG_HUGETLB_PAGE, if CONFIG_HUGETLB_PAGE is not configured even if CONFIG_NUMA is configured, "numa_stat" is missed form /proc. Move it out of CONFIG_HUGETLB_PAGE to fix it. Fixes: 4518085e127d ("mm, sysctl: make NUMA stats configurable") Signed-off-by: Muchun Song Cc: Acked-by: Michal Hocko Acked-by: Mel Gorman Signed-off-by: Luis Chamberlain Signed-off-by: Sasha Levin --- kernel/sysctl.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 79cbfd0fa3be92..25c18b2df68486 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -2852,6 +2852,17 @@ static struct ctl_table vm_table[] = { .extra1 = SYSCTL_ZERO, .extra2 = &two_hundred, }, +#ifdef CONFIG_NUMA + { + .procname = "numa_stat", + .data = &sysctl_vm_numa_stat, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = sysctl_vm_numa_stat_handler, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }, +#endif #ifdef CONFIG_HUGETLB_PAGE { .procname = "nr_hugepages", @@ -2868,15 +2879,6 @@ static struct ctl_table vm_table[] = { .mode = 0644, .proc_handler = &hugetlb_mempolicy_sysctl_handler, }, - { - .procname = "numa_stat", - .data = &sysctl_vm_numa_stat, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = sysctl_vm_numa_stat_handler, - .extra1 = SYSCTL_ZERO, - .extra2 = SYSCTL_ONE, - }, #endif { .procname = "hugetlb_shm_group", From d2faf8ed1dc28b709cde51317ee28577082679d2 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Wed, 15 Sep 2021 15:17:16 +0800 Subject: [PATCH 0604/1056] btrfs: rename btrfs_bio to btrfs_io_context [ Upstream commit 4c6646117912397d026d70c04d92ec1599522e9f ] The structure btrfs_bio is used by two different sites: - bio->bi_private for mirror based profiles For those profiles (SINGLE/DUP/RAID1*/RAID10), this structures records how many mirrors are still pending, and save the original endio function of the bio. - RAID56 code In that case, RAID56 only utilize the stripes info, and no long uses that to trace the pending mirrors. So btrfs_bio is not always bind to a bio, and contains more info for IO context, thus renaming it will make the naming less confusing. Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Sasha Levin --- fs/btrfs/check-integrity.c | 2 +- fs/btrfs/extent-tree.c | 19 ++- fs/btrfs/extent_io.c | 18 +-- fs/btrfs/extent_map.c | 4 +- fs/btrfs/raid56.c | 127 +++++++++--------- fs/btrfs/raid56.h | 8 +- fs/btrfs/reada.c | 26 ++-- fs/btrfs/scrub.c | 115 ++++++++-------- fs/btrfs/volumes.c | 267 ++++++++++++++++++------------------- fs/btrfs/volumes.h | 38 ++++-- fs/btrfs/zoned.c | 16 +-- 11 files changed, 325 insertions(+), 315 deletions(-) diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c index 86816088927f15..81b11124b67a87 100644 --- a/fs/btrfs/check-integrity.c +++ b/fs/btrfs/check-integrity.c @@ -1455,7 +1455,7 @@ static int btrfsic_map_block(struct btrfsic_state *state, u64 bytenr, u32 len, struct btrfs_fs_info *fs_info = state->fs_info; int ret; u64 length; - struct btrfs_bio *multi = NULL; + struct btrfs_io_context *multi = NULL; struct btrfs_device *device; length = len; diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index f11616f61dd62c..e3514f9a4e8ddb 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1266,7 +1266,7 @@ static int btrfs_issue_discard(struct block_device *bdev, u64 start, u64 len, return ret; } -static int do_discard_extent(struct btrfs_bio_stripe *stripe, u64 *bytes) +static int do_discard_extent(struct btrfs_io_stripe *stripe, u64 *bytes) { struct btrfs_device *dev = stripe->dev; struct btrfs_fs_info *fs_info = dev->fs_info; @@ -1313,22 +1313,21 @@ int btrfs_discard_extent(struct btrfs_fs_info *fs_info, u64 bytenr, u64 discarded_bytes = 0; u64 end = bytenr + num_bytes; u64 cur = bytenr; - struct btrfs_bio *bbio = NULL; - + struct btrfs_io_context *bioc = NULL; /* - * Avoid races with device replace and make sure our bbio has devices + * Avoid races with device replace and make sure our bioc has devices * associated to its stripes that don't go away while we are discarding. */ btrfs_bio_counter_inc_blocked(fs_info); while (cur < end) { - struct btrfs_bio_stripe *stripe; + struct btrfs_io_stripe *stripe; int i; num_bytes = end - cur; /* Tell the block device(s) that the sectors can be discarded */ ret = btrfs_map_block(fs_info, BTRFS_MAP_DISCARD, cur, - &num_bytes, &bbio, 0); + &num_bytes, &bioc, 0); /* * Error can be -ENOMEM, -ENOENT (no such chunk mapping) or * -EOPNOTSUPP. For any such error, @num_bytes is not updated, @@ -1337,8 +1336,8 @@ int btrfs_discard_extent(struct btrfs_fs_info *fs_info, u64 bytenr, if (ret < 0) goto out; - stripe = bbio->stripes; - for (i = 0; i < bbio->num_stripes; i++, stripe++) { + stripe = bioc->stripes; + for (i = 0; i < bioc->num_stripes; i++, stripe++) { u64 bytes; struct btrfs_device *device = stripe->dev; @@ -1361,7 +1360,7 @@ int btrfs_discard_extent(struct btrfs_fs_info *fs_info, u64 bytenr, * And since there are two loops, explicitly * go to out to avoid confusion. */ - btrfs_put_bbio(bbio); + btrfs_put_bioc(bioc); goto out; } @@ -1372,7 +1371,7 @@ int btrfs_discard_extent(struct btrfs_fs_info *fs_info, u64 bytenr, */ ret = 0; } - btrfs_put_bbio(bbio); + btrfs_put_bioc(bioc); cur += num_bytes; } out: diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 059bd0753e27f6..b791e280af0ca0 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2290,7 +2290,7 @@ int repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 start, struct btrfs_device *dev; u64 map_length = 0; u64 sector; - struct btrfs_bio *bbio = NULL; + struct btrfs_io_context *bioc = NULL; int ret; ASSERT(!(fs_info->sb->s_flags & SB_RDONLY)); @@ -2304,7 +2304,7 @@ int repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 start, map_length = length; /* - * Avoid races with device replace and make sure our bbio has devices + * Avoid races with device replace and make sure our bioc has devices * associated to its stripes that don't go away while we are doing the * read repair operation. */ @@ -2317,28 +2317,28 @@ int repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 start, * stripe's dev and sector. */ ret = btrfs_map_block(fs_info, BTRFS_MAP_READ, logical, - &map_length, &bbio, 0); + &map_length, &bioc, 0); if (ret) { btrfs_bio_counter_dec(fs_info); bio_put(bio); return -EIO; } - ASSERT(bbio->mirror_num == 1); + ASSERT(bioc->mirror_num == 1); } else { ret = btrfs_map_block(fs_info, BTRFS_MAP_WRITE, logical, - &map_length, &bbio, mirror_num); + &map_length, &bioc, mirror_num); if (ret) { btrfs_bio_counter_dec(fs_info); bio_put(bio); return -EIO; } - BUG_ON(mirror_num != bbio->mirror_num); + BUG_ON(mirror_num != bioc->mirror_num); } - sector = bbio->stripes[bbio->mirror_num - 1].physical >> 9; + sector = bioc->stripes[bioc->mirror_num - 1].physical >> 9; bio->bi_iter.bi_sector = sector; - dev = bbio->stripes[bbio->mirror_num - 1].dev; - btrfs_put_bbio(bbio); + dev = bioc->stripes[bioc->mirror_num - 1].dev; + btrfs_put_bioc(bioc); if (!dev || !dev->bdev || !test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state)) { btrfs_bio_counter_dec(fs_info); diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 4a8e02f7b6c7a1..5a36add213053f 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -360,7 +360,7 @@ static void extent_map_device_set_bits(struct extent_map *em, unsigned bits) int i; for (i = 0; i < map->num_stripes; i++) { - struct btrfs_bio_stripe *stripe = &map->stripes[i]; + struct btrfs_io_stripe *stripe = &map->stripes[i]; struct btrfs_device *device = stripe->dev; set_extent_bits_nowait(&device->alloc_state, stripe->physical, @@ -375,7 +375,7 @@ static void extent_map_device_clear_bits(struct extent_map *em, unsigned bits) int i; for (i = 0; i < map->num_stripes; i++) { - struct btrfs_bio_stripe *stripe = &map->stripes[i]; + struct btrfs_io_stripe *stripe = &map->stripes[i]; struct btrfs_device *device = stripe->dev; __clear_extent_bit(&device->alloc_state, stripe->physical, diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c index d8d268ca8aa76e..893d93e3c516da 100644 --- a/fs/btrfs/raid56.c +++ b/fs/btrfs/raid56.c @@ -61,7 +61,7 @@ enum btrfs_rbio_ops { struct btrfs_raid_bio { struct btrfs_fs_info *fs_info; - struct btrfs_bio *bbio; + struct btrfs_io_context *bioc; /* while we're doing rmw on a stripe * we put it into a hash table so we can @@ -271,7 +271,7 @@ static void cache_rbio_pages(struct btrfs_raid_bio *rbio) */ static int rbio_bucket(struct btrfs_raid_bio *rbio) { - u64 num = rbio->bbio->raid_map[0]; + u64 num = rbio->bioc->raid_map[0]; /* * we shift down quite a bit. We're using byte @@ -559,8 +559,7 @@ static int rbio_can_merge(struct btrfs_raid_bio *last, test_bit(RBIO_CACHE_BIT, &cur->flags)) return 0; - if (last->bbio->raid_map[0] != - cur->bbio->raid_map[0]) + if (last->bioc->raid_map[0] != cur->bioc->raid_map[0]) return 0; /* we can't merge with different operations */ @@ -673,7 +672,7 @@ static noinline int lock_stripe_add(struct btrfs_raid_bio *rbio) spin_lock_irqsave(&h->lock, flags); list_for_each_entry(cur, &h->hash_list, hash_list) { - if (cur->bbio->raid_map[0] != rbio->bbio->raid_map[0]) + if (cur->bioc->raid_map[0] != rbio->bioc->raid_map[0]) continue; spin_lock(&cur->bio_list_lock); @@ -838,7 +837,7 @@ static void __free_raid_bio(struct btrfs_raid_bio *rbio) } } - btrfs_put_bbio(rbio->bbio); + btrfs_put_bioc(rbio->bioc); kfree(rbio); } @@ -906,7 +905,7 @@ static void raid_write_end_io(struct bio *bio) /* OK, we have read all the stripes we need to. */ max_errors = (rbio->operation == BTRFS_RBIO_PARITY_SCRUB) ? - 0 : rbio->bbio->max_errors; + 0 : rbio->bioc->max_errors; if (atomic_read(&rbio->error) > max_errors) err = BLK_STS_IOERR; @@ -961,12 +960,12 @@ static unsigned long rbio_nr_pages(unsigned long stripe_len, int nr_stripes) * this does not allocate any pages for rbio->pages. */ static struct btrfs_raid_bio *alloc_rbio(struct btrfs_fs_info *fs_info, - struct btrfs_bio *bbio, + struct btrfs_io_context *bioc, u64 stripe_len) { struct btrfs_raid_bio *rbio; int nr_data = 0; - int real_stripes = bbio->num_stripes - bbio->num_tgtdevs; + int real_stripes = bioc->num_stripes - bioc->num_tgtdevs; int num_pages = rbio_nr_pages(stripe_len, real_stripes); int stripe_npages = DIV_ROUND_UP(stripe_len, PAGE_SIZE); void *p; @@ -987,7 +986,7 @@ static struct btrfs_raid_bio *alloc_rbio(struct btrfs_fs_info *fs_info, spin_lock_init(&rbio->bio_list_lock); INIT_LIST_HEAD(&rbio->stripe_cache); INIT_LIST_HEAD(&rbio->hash_list); - rbio->bbio = bbio; + rbio->bioc = bioc; rbio->fs_info = fs_info; rbio->stripe_len = stripe_len; rbio->nr_pages = num_pages; @@ -1015,9 +1014,9 @@ static struct btrfs_raid_bio *alloc_rbio(struct btrfs_fs_info *fs_info, CONSUME_ALLOC(rbio->finish_pbitmap, BITS_TO_LONGS(stripe_npages)); #undef CONSUME_ALLOC - if (bbio->map_type & BTRFS_BLOCK_GROUP_RAID5) + if (bioc->map_type & BTRFS_BLOCK_GROUP_RAID5) nr_data = real_stripes - 1; - else if (bbio->map_type & BTRFS_BLOCK_GROUP_RAID6) + else if (bioc->map_type & BTRFS_BLOCK_GROUP_RAID6) nr_data = real_stripes - 2; else BUG(); @@ -1077,10 +1076,10 @@ static int rbio_add_io_page(struct btrfs_raid_bio *rbio, struct bio *last = bio_list->tail; int ret; struct bio *bio; - struct btrfs_bio_stripe *stripe; + struct btrfs_io_stripe *stripe; u64 disk_start; - stripe = &rbio->bbio->stripes[stripe_nr]; + stripe = &rbio->bioc->stripes[stripe_nr]; disk_start = stripe->physical + (page_index << PAGE_SHIFT); /* if the device is missing, just fail this stripe */ @@ -1155,7 +1154,7 @@ static void index_rbio_pages(struct btrfs_raid_bio *rbio) int i = 0; start = bio->bi_iter.bi_sector << 9; - stripe_offset = start - rbio->bbio->raid_map[0]; + stripe_offset = start - rbio->bioc->raid_map[0]; page_index = stripe_offset >> PAGE_SHIFT; if (bio_flagged(bio, BIO_CLONED)) @@ -1179,7 +1178,7 @@ static void index_rbio_pages(struct btrfs_raid_bio *rbio) */ static noinline void finish_rmw(struct btrfs_raid_bio *rbio) { - struct btrfs_bio *bbio = rbio->bbio; + struct btrfs_io_context *bioc = rbio->bioc; void **pointers = rbio->finish_pointers; int nr_data = rbio->nr_data; int stripe; @@ -1284,11 +1283,11 @@ static noinline void finish_rmw(struct btrfs_raid_bio *rbio) } } - if (likely(!bbio->num_tgtdevs)) + if (likely(!bioc->num_tgtdevs)) goto write_data; for (stripe = 0; stripe < rbio->real_stripes; stripe++) { - if (!bbio->tgtdev_map[stripe]) + if (!bioc->tgtdev_map[stripe]) continue; for (pagenr = 0; pagenr < rbio->stripe_npages; pagenr++) { @@ -1302,7 +1301,7 @@ static noinline void finish_rmw(struct btrfs_raid_bio *rbio) } ret = rbio_add_io_page(rbio, &bio_list, page, - rbio->bbio->tgtdev_map[stripe], + rbio->bioc->tgtdev_map[stripe], pagenr, rbio->stripe_len); if (ret) goto cleanup; @@ -1339,12 +1338,12 @@ static int find_bio_stripe(struct btrfs_raid_bio *rbio, { u64 physical = bio->bi_iter.bi_sector; int i; - struct btrfs_bio_stripe *stripe; + struct btrfs_io_stripe *stripe; physical <<= 9; - for (i = 0; i < rbio->bbio->num_stripes; i++) { - stripe = &rbio->bbio->stripes[i]; + for (i = 0; i < rbio->bioc->num_stripes; i++) { + stripe = &rbio->bioc->stripes[i]; if (in_range(physical, stripe->physical, rbio->stripe_len) && stripe->dev->bdev && bio->bi_bdev == stripe->dev->bdev) { return i; @@ -1365,7 +1364,7 @@ static int find_logical_bio_stripe(struct btrfs_raid_bio *rbio, int i; for (i = 0; i < rbio->nr_data; i++) { - u64 stripe_start = rbio->bbio->raid_map[i]; + u64 stripe_start = rbio->bioc->raid_map[i]; if (in_range(logical, stripe_start, rbio->stripe_len)) return i; @@ -1456,7 +1455,7 @@ static void raid_rmw_end_io(struct bio *bio) if (!atomic_dec_and_test(&rbio->stripes_pending)) return; - if (atomic_read(&rbio->error) > rbio->bbio->max_errors) + if (atomic_read(&rbio->error) > rbio->bioc->max_errors) goto cleanup; /* @@ -1538,8 +1537,8 @@ static int raid56_rmw_stripe(struct btrfs_raid_bio *rbio) } /* - * the bbio may be freed once we submit the last bio. Make sure - * not to touch it after that + * The bioc may be freed once we submit the last bio. Make sure not to + * touch it after that. */ atomic_set(&rbio->stripes_pending, bios_to_read); while ((bio = bio_list_pop(&bio_list))) { @@ -1720,16 +1719,16 @@ static void btrfs_raid_unplug(struct blk_plug_cb *cb, bool from_schedule) * our main entry point for writes from the rest of the FS. */ int raid56_parity_write(struct btrfs_fs_info *fs_info, struct bio *bio, - struct btrfs_bio *bbio, u64 stripe_len) + struct btrfs_io_context *bioc, u64 stripe_len) { struct btrfs_raid_bio *rbio; struct btrfs_plug_cb *plug = NULL; struct blk_plug_cb *cb; int ret; - rbio = alloc_rbio(fs_info, bbio, stripe_len); + rbio = alloc_rbio(fs_info, bioc, stripe_len); if (IS_ERR(rbio)) { - btrfs_put_bbio(bbio); + btrfs_put_bioc(bioc); return PTR_ERR(rbio); } bio_list_add(&rbio->bio_list, bio); @@ -1842,7 +1841,7 @@ static void __raid_recover_end_io(struct btrfs_raid_bio *rbio) } /* all raid6 handling here */ - if (rbio->bbio->map_type & BTRFS_BLOCK_GROUP_RAID6) { + if (rbio->bioc->map_type & BTRFS_BLOCK_GROUP_RAID6) { /* * single failure, rebuild from parity raid5 * style @@ -1874,8 +1873,8 @@ static void __raid_recover_end_io(struct btrfs_raid_bio *rbio) * here due to a crc mismatch and we can't give them the * data they want */ - if (rbio->bbio->raid_map[failb] == RAID6_Q_STRIPE) { - if (rbio->bbio->raid_map[faila] == + if (rbio->bioc->raid_map[failb] == RAID6_Q_STRIPE) { + if (rbio->bioc->raid_map[faila] == RAID5_P_STRIPE) { err = BLK_STS_IOERR; goto cleanup; @@ -1887,7 +1886,7 @@ static void __raid_recover_end_io(struct btrfs_raid_bio *rbio) goto pstripe; } - if (rbio->bbio->raid_map[failb] == RAID5_P_STRIPE) { + if (rbio->bioc->raid_map[failb] == RAID5_P_STRIPE) { raid6_datap_recov(rbio->real_stripes, PAGE_SIZE, faila, pointers); } else { @@ -2006,7 +2005,7 @@ static void raid_recover_end_io(struct bio *bio) if (!atomic_dec_and_test(&rbio->stripes_pending)) return; - if (atomic_read(&rbio->error) > rbio->bbio->max_errors) + if (atomic_read(&rbio->error) > rbio->bioc->max_errors) rbio_orig_end_io(rbio, BLK_STS_IOERR); else __raid_recover_end_io(rbio); @@ -2074,7 +2073,7 @@ static int __raid56_parity_recover(struct btrfs_raid_bio *rbio) * were up to date, or we might have no bios to read because * the devices were gone. */ - if (atomic_read(&rbio->error) <= rbio->bbio->max_errors) { + if (atomic_read(&rbio->error) <= rbio->bioc->max_errors) { __raid_recover_end_io(rbio); return 0; } else { @@ -2083,8 +2082,8 @@ static int __raid56_parity_recover(struct btrfs_raid_bio *rbio) } /* - * the bbio may be freed once we submit the last bio. Make sure - * not to touch it after that + * The bioc may be freed once we submit the last bio. Make sure not to + * touch it after that. */ atomic_set(&rbio->stripes_pending, bios_to_read); while ((bio = bio_list_pop(&bio_list))) { @@ -2117,21 +2116,21 @@ static int __raid56_parity_recover(struct btrfs_raid_bio *rbio) * of the drive. */ int raid56_parity_recover(struct btrfs_fs_info *fs_info, struct bio *bio, - struct btrfs_bio *bbio, u64 stripe_len, + struct btrfs_io_context *bioc, u64 stripe_len, int mirror_num, int generic_io) { struct btrfs_raid_bio *rbio; int ret; if (generic_io) { - ASSERT(bbio->mirror_num == mirror_num); + ASSERT(bioc->mirror_num == mirror_num); btrfs_io_bio(bio)->mirror_num = mirror_num; } - rbio = alloc_rbio(fs_info, bbio, stripe_len); + rbio = alloc_rbio(fs_info, bioc, stripe_len); if (IS_ERR(rbio)) { if (generic_io) - btrfs_put_bbio(bbio); + btrfs_put_bioc(bioc); return PTR_ERR(rbio); } @@ -2142,11 +2141,11 @@ int raid56_parity_recover(struct btrfs_fs_info *fs_info, struct bio *bio, rbio->faila = find_logical_bio_stripe(rbio, bio); if (rbio->faila == -1) { btrfs_warn(fs_info, - "%s could not find the bad stripe in raid56 so that we cannot recover any more (bio has logical %llu len %llu, bbio has map_type %llu)", +"%s could not find the bad stripe in raid56 so that we cannot recover any more (bio has logical %llu len %llu, bioc has map_type %llu)", __func__, bio->bi_iter.bi_sector << 9, - (u64)bio->bi_iter.bi_size, bbio->map_type); + (u64)bio->bi_iter.bi_size, bioc->map_type); if (generic_io) - btrfs_put_bbio(bbio); + btrfs_put_bioc(bioc); kfree(rbio); return -EIO; } @@ -2155,7 +2154,7 @@ int raid56_parity_recover(struct btrfs_fs_info *fs_info, struct bio *bio, btrfs_bio_counter_inc_noblocked(fs_info); rbio->generic_bio_cnt = 1; } else { - btrfs_get_bbio(bbio); + btrfs_get_bioc(bioc); } /* @@ -2214,7 +2213,7 @@ static void read_rebuild_work(struct btrfs_work *work) /* * The following code is used to scrub/replace the parity stripe * - * Caller must have already increased bio_counter for getting @bbio. + * Caller must have already increased bio_counter for getting @bioc. * * Note: We need make sure all the pages that add into the scrub/replace * raid bio are correct and not be changed during the scrub/replace. That @@ -2223,14 +2222,14 @@ static void read_rebuild_work(struct btrfs_work *work) struct btrfs_raid_bio * raid56_parity_alloc_scrub_rbio(struct btrfs_fs_info *fs_info, struct bio *bio, - struct btrfs_bio *bbio, u64 stripe_len, + struct btrfs_io_context *bioc, u64 stripe_len, struct btrfs_device *scrub_dev, unsigned long *dbitmap, int stripe_nsectors) { struct btrfs_raid_bio *rbio; int i; - rbio = alloc_rbio(fs_info, bbio, stripe_len); + rbio = alloc_rbio(fs_info, bioc, stripe_len); if (IS_ERR(rbio)) return NULL; bio_list_add(&rbio->bio_list, bio); @@ -2242,12 +2241,12 @@ raid56_parity_alloc_scrub_rbio(struct btrfs_fs_info *fs_info, struct bio *bio, rbio->operation = BTRFS_RBIO_PARITY_SCRUB; /* - * After mapping bbio with BTRFS_MAP_WRITE, parities have been sorted + * After mapping bioc with BTRFS_MAP_WRITE, parities have been sorted * to the end position, so this search can start from the first parity * stripe. */ for (i = rbio->nr_data; i < rbio->real_stripes; i++) { - if (bbio->stripes[i].dev == scrub_dev) { + if (bioc->stripes[i].dev == scrub_dev) { rbio->scrubp = i; break; } @@ -2260,7 +2259,7 @@ raid56_parity_alloc_scrub_rbio(struct btrfs_fs_info *fs_info, struct bio *bio, bitmap_copy(rbio->dbitmap, dbitmap, stripe_nsectors); /* - * We have already increased bio_counter when getting bbio, record it + * We have already increased bio_counter when getting bioc, record it * so we can free it at rbio_orig_end_io(). */ rbio->generic_bio_cnt = 1; @@ -2275,10 +2274,10 @@ void raid56_add_scrub_pages(struct btrfs_raid_bio *rbio, struct page *page, int stripe_offset; int index; - ASSERT(logical >= rbio->bbio->raid_map[0]); - ASSERT(logical + PAGE_SIZE <= rbio->bbio->raid_map[0] + + ASSERT(logical >= rbio->bioc->raid_map[0]); + ASSERT(logical + PAGE_SIZE <= rbio->bioc->raid_map[0] + rbio->stripe_len * rbio->nr_data); - stripe_offset = (int)(logical - rbio->bbio->raid_map[0]); + stripe_offset = (int)(logical - rbio->bioc->raid_map[0]); index = stripe_offset >> PAGE_SHIFT; rbio->bio_pages[index] = page; } @@ -2312,7 +2311,7 @@ static int alloc_rbio_essential_pages(struct btrfs_raid_bio *rbio) static noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio, int need_check) { - struct btrfs_bio *bbio = rbio->bbio; + struct btrfs_io_context *bioc = rbio->bioc; void **pointers = rbio->finish_pointers; unsigned long *pbitmap = rbio->finish_pbitmap; int nr_data = rbio->nr_data; @@ -2335,7 +2334,7 @@ static noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio, else BUG(); - if (bbio->num_tgtdevs && bbio->tgtdev_map[rbio->scrubp]) { + if (bioc->num_tgtdevs && bioc->tgtdev_map[rbio->scrubp]) { is_replace = 1; bitmap_copy(pbitmap, rbio->dbitmap, rbio->stripe_npages); } @@ -2435,7 +2434,7 @@ static noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio, page = rbio_stripe_page(rbio, rbio->scrubp, pagenr); ret = rbio_add_io_page(rbio, &bio_list, page, - bbio->tgtdev_map[rbio->scrubp], + bioc->tgtdev_map[rbio->scrubp], pagenr, rbio->stripe_len); if (ret) goto cleanup; @@ -2483,7 +2482,7 @@ static inline int is_data_stripe(struct btrfs_raid_bio *rbio, int stripe) */ static void validate_rbio_for_parity_scrub(struct btrfs_raid_bio *rbio) { - if (atomic_read(&rbio->error) > rbio->bbio->max_errors) + if (atomic_read(&rbio->error) > rbio->bioc->max_errors) goto cleanup; if (rbio->faila >= 0 || rbio->failb >= 0) { @@ -2504,7 +2503,7 @@ static void validate_rbio_for_parity_scrub(struct btrfs_raid_bio *rbio) * the data, so the capability of the repair is declined. * (In the case of RAID5, we can not repair anything) */ - if (dfail > rbio->bbio->max_errors - 1) + if (dfail > rbio->bioc->max_errors - 1) goto cleanup; /* @@ -2625,8 +2624,8 @@ static void raid56_parity_scrub_stripe(struct btrfs_raid_bio *rbio) } /* - * the bbio may be freed once we submit the last bio. Make sure - * not to touch it after that + * The bioc may be freed once we submit the last bio. Make sure not to + * touch it after that. */ atomic_set(&rbio->stripes_pending, bios_to_read); while ((bio = bio_list_pop(&bio_list))) { @@ -2671,11 +2670,11 @@ void raid56_parity_submit_scrub_rbio(struct btrfs_raid_bio *rbio) struct btrfs_raid_bio * raid56_alloc_missing_rbio(struct btrfs_fs_info *fs_info, struct bio *bio, - struct btrfs_bio *bbio, u64 length) + struct btrfs_io_context *bioc, u64 length) { struct btrfs_raid_bio *rbio; - rbio = alloc_rbio(fs_info, bbio, length); + rbio = alloc_rbio(fs_info, bioc, length); if (IS_ERR(rbio)) return NULL; @@ -2695,7 +2694,7 @@ raid56_alloc_missing_rbio(struct btrfs_fs_info *fs_info, struct bio *bio, } /* - * When we get bbio, we have already increased bio_counter, record it + * When we get bioc, we have already increased bio_counter, record it * so we can free it at rbio_orig_end_io() */ rbio->generic_bio_cnt = 1; diff --git a/fs/btrfs/raid56.h b/fs/btrfs/raid56.h index 2503485db859b2..838d3a5e07ef4d 100644 --- a/fs/btrfs/raid56.h +++ b/fs/btrfs/raid56.h @@ -31,24 +31,24 @@ struct btrfs_raid_bio; struct btrfs_device; int raid56_parity_recover(struct btrfs_fs_info *fs_info, struct bio *bio, - struct btrfs_bio *bbio, u64 stripe_len, + struct btrfs_io_context *bioc, u64 stripe_len, int mirror_num, int generic_io); int raid56_parity_write(struct btrfs_fs_info *fs_info, struct bio *bio, - struct btrfs_bio *bbio, u64 stripe_len); + struct btrfs_io_context *bioc, u64 stripe_len); void raid56_add_scrub_pages(struct btrfs_raid_bio *rbio, struct page *page, u64 logical); struct btrfs_raid_bio * raid56_parity_alloc_scrub_rbio(struct btrfs_fs_info *fs_info, struct bio *bio, - struct btrfs_bio *bbio, u64 stripe_len, + struct btrfs_io_context *bioc, u64 stripe_len, struct btrfs_device *scrub_dev, unsigned long *dbitmap, int stripe_nsectors); void raid56_parity_submit_scrub_rbio(struct btrfs_raid_bio *rbio); struct btrfs_raid_bio * raid56_alloc_missing_rbio(struct btrfs_fs_info *fs_info, struct bio *bio, - struct btrfs_bio *bbio, u64 length); + struct btrfs_io_context *bioc, u64 length); void raid56_submit_missing_rbio(struct btrfs_raid_bio *rbio); int btrfs_alloc_stripe_hash_table(struct btrfs_fs_info *info); diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c index 06713a8fe26b4a..eb96fdc3be25f8 100644 --- a/fs/btrfs/reada.c +++ b/fs/btrfs/reada.c @@ -227,7 +227,7 @@ int btree_readahead_hook(struct extent_buffer *eb, int err) } static struct reada_zone *reada_find_zone(struct btrfs_device *dev, u64 logical, - struct btrfs_bio *bbio) + struct btrfs_io_context *bioc) { struct btrfs_fs_info *fs_info = dev->fs_info; int ret; @@ -275,11 +275,11 @@ static struct reada_zone *reada_find_zone(struct btrfs_device *dev, u64 logical, kref_init(&zone->refcnt); zone->elems = 0; zone->device = dev; /* our device always sits at index 0 */ - for (i = 0; i < bbio->num_stripes; ++i) { + for (i = 0; i < bioc->num_stripes; ++i) { /* bounds have already been checked */ - zone->devs[i] = bbio->stripes[i].dev; + zone->devs[i] = bioc->stripes[i].dev; } - zone->ndevs = bbio->num_stripes; + zone->ndevs = bioc->num_stripes; spin_lock(&fs_info->reada_lock); ret = radix_tree_insert(&dev->reada_zones, @@ -309,7 +309,7 @@ static struct reada_extent *reada_find_extent(struct btrfs_fs_info *fs_info, int ret; struct reada_extent *re = NULL; struct reada_extent *re_exist = NULL; - struct btrfs_bio *bbio = NULL; + struct btrfs_io_context *bioc = NULL; struct btrfs_device *dev; struct btrfs_device *prev_dev; u64 length; @@ -345,28 +345,28 @@ static struct reada_extent *reada_find_extent(struct btrfs_fs_info *fs_info, */ length = fs_info->nodesize; ret = btrfs_map_block(fs_info, BTRFS_MAP_GET_READ_MIRRORS, logical, - &length, &bbio, 0); - if (ret || !bbio || length < fs_info->nodesize) + &length, &bioc, 0); + if (ret || !bioc || length < fs_info->nodesize) goto error; - if (bbio->num_stripes > BTRFS_MAX_MIRRORS) { + if (bioc->num_stripes > BTRFS_MAX_MIRRORS) { btrfs_err(fs_info, "readahead: more than %d copies not supported", BTRFS_MAX_MIRRORS); goto error; } - real_stripes = bbio->num_stripes - bbio->num_tgtdevs; + real_stripes = bioc->num_stripes - bioc->num_tgtdevs; for (nzones = 0; nzones < real_stripes; ++nzones) { struct reada_zone *zone; - dev = bbio->stripes[nzones].dev; + dev = bioc->stripes[nzones].dev; /* cannot read ahead on missing device. */ if (!dev->bdev) continue; - zone = reada_find_zone(dev, logical, bbio); + zone = reada_find_zone(dev, logical, bioc); if (!zone) continue; @@ -464,7 +464,7 @@ static struct reada_extent *reada_find_extent(struct btrfs_fs_info *fs_info, if (!have_zone) goto error; - btrfs_put_bbio(bbio); + btrfs_put_bioc(bioc); return re; error: @@ -488,7 +488,7 @@ static struct reada_extent *reada_find_extent(struct btrfs_fs_info *fs_info, kref_put(&zone->refcnt, reada_zone_release); spin_unlock(&fs_info->reada_lock); } - btrfs_put_bbio(bbio); + btrfs_put_bioc(bioc); kfree(re); return re_exist; } diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 6f2787b21530ff..0785d9d645fc36 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -57,7 +57,7 @@ struct scrub_ctx; struct scrub_recover { refcount_t refs; - struct btrfs_bio *bbio; + struct btrfs_io_context *bioc; u64 map_length; }; @@ -254,7 +254,7 @@ static void scrub_put_ctx(struct scrub_ctx *sctx); static inline int scrub_is_page_on_raid56(struct scrub_page *spage) { return spage->recover && - (spage->recover->bbio->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK); + (spage->recover->bioc->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK); } static void scrub_pending_bio_inc(struct scrub_ctx *sctx) @@ -798,7 +798,7 @@ static inline void scrub_put_recover(struct btrfs_fs_info *fs_info, { if (refcount_dec_and_test(&recover->refs)) { btrfs_bio_counter_dec(fs_info); - btrfs_put_bbio(recover->bbio); + btrfs_put_bioc(recover->bioc); kfree(recover); } } @@ -1027,8 +1027,7 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check) sblock_other = sblocks_for_recheck + mirror_index; } else { struct scrub_recover *r = sblock_bad->pagev[0]->recover; - int max_allowed = r->bbio->num_stripes - - r->bbio->num_tgtdevs; + int max_allowed = r->bioc->num_stripes - r->bioc->num_tgtdevs; if (mirror_index >= max_allowed) break; @@ -1218,14 +1217,14 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check) return 0; } -static inline int scrub_nr_raid_mirrors(struct btrfs_bio *bbio) +static inline int scrub_nr_raid_mirrors(struct btrfs_io_context *bioc) { - if (bbio->map_type & BTRFS_BLOCK_GROUP_RAID5) + if (bioc->map_type & BTRFS_BLOCK_GROUP_RAID5) return 2; - else if (bbio->map_type & BTRFS_BLOCK_GROUP_RAID6) + else if (bioc->map_type & BTRFS_BLOCK_GROUP_RAID6) return 3; else - return (int)bbio->num_stripes; + return (int)bioc->num_stripes; } static inline void scrub_stripe_index_and_offset(u64 logical, u64 map_type, @@ -1269,7 +1268,7 @@ static int scrub_setup_recheck_block(struct scrub_block *original_sblock, u64 flags = original_sblock->pagev[0]->flags; u64 have_csum = original_sblock->pagev[0]->have_csum; struct scrub_recover *recover; - struct btrfs_bio *bbio; + struct btrfs_io_context *bioc; u64 sublen; u64 mapped_length; u64 stripe_offset; @@ -1288,7 +1287,7 @@ static int scrub_setup_recheck_block(struct scrub_block *original_sblock, while (length > 0) { sublen = min_t(u64, length, fs_info->sectorsize); mapped_length = sublen; - bbio = NULL; + bioc = NULL; /* * With a length of sectorsize, each returned stripe represents @@ -1296,27 +1295,27 @@ static int scrub_setup_recheck_block(struct scrub_block *original_sblock, */ btrfs_bio_counter_inc_blocked(fs_info); ret = btrfs_map_sblock(fs_info, BTRFS_MAP_GET_READ_MIRRORS, - logical, &mapped_length, &bbio); - if (ret || !bbio || mapped_length < sublen) { - btrfs_put_bbio(bbio); + logical, &mapped_length, &bioc); + if (ret || !bioc || mapped_length < sublen) { + btrfs_put_bioc(bioc); btrfs_bio_counter_dec(fs_info); return -EIO; } recover = kzalloc(sizeof(struct scrub_recover), GFP_NOFS); if (!recover) { - btrfs_put_bbio(bbio); + btrfs_put_bioc(bioc); btrfs_bio_counter_dec(fs_info); return -ENOMEM; } refcount_set(&recover->refs, 1); - recover->bbio = bbio; + recover->bioc = bioc; recover->map_length = mapped_length; BUG_ON(page_index >= SCRUB_MAX_PAGES_PER_BLOCK); - nmirrors = min(scrub_nr_raid_mirrors(bbio), BTRFS_MAX_MIRRORS); + nmirrors = min(scrub_nr_raid_mirrors(bioc), BTRFS_MAX_MIRRORS); for (mirror_index = 0; mirror_index < nmirrors; mirror_index++) { @@ -1348,17 +1347,17 @@ static int scrub_setup_recheck_block(struct scrub_block *original_sblock, sctx->fs_info->csum_size); scrub_stripe_index_and_offset(logical, - bbio->map_type, - bbio->raid_map, + bioc->map_type, + bioc->raid_map, mapped_length, - bbio->num_stripes - - bbio->num_tgtdevs, + bioc->num_stripes - + bioc->num_tgtdevs, mirror_index, &stripe_index, &stripe_offset); - spage->physical = bbio->stripes[stripe_index].physical + + spage->physical = bioc->stripes[stripe_index].physical + stripe_offset; - spage->dev = bbio->stripes[stripe_index].dev; + spage->dev = bioc->stripes[stripe_index].dev; BUG_ON(page_index >= original_sblock->page_count); spage->physical_for_dev_replace = @@ -1401,7 +1400,7 @@ static int scrub_submit_raid56_bio_wait(struct btrfs_fs_info *fs_info, bio->bi_end_io = scrub_bio_wait_endio; mirror_num = spage->sblock->pagev[0]->mirror_num; - ret = raid56_parity_recover(fs_info, bio, spage->recover->bbio, + ret = raid56_parity_recover(fs_info, bio, spage->recover->bioc, spage->recover->map_length, mirror_num, 0); if (ret) @@ -2203,7 +2202,7 @@ static void scrub_missing_raid56_pages(struct scrub_block *sblock) struct btrfs_fs_info *fs_info = sctx->fs_info; u64 length = sblock->page_count * PAGE_SIZE; u64 logical = sblock->pagev[0]->logical; - struct btrfs_bio *bbio = NULL; + struct btrfs_io_context *bioc = NULL; struct bio *bio; struct btrfs_raid_bio *rbio; int ret; @@ -2211,19 +2210,19 @@ static void scrub_missing_raid56_pages(struct scrub_block *sblock) btrfs_bio_counter_inc_blocked(fs_info); ret = btrfs_map_sblock(fs_info, BTRFS_MAP_GET_READ_MIRRORS, logical, - &length, &bbio); - if (ret || !bbio || !bbio->raid_map) - goto bbio_out; + &length, &bioc); + if (ret || !bioc || !bioc->raid_map) + goto bioc_out; if (WARN_ON(!sctx->is_dev_replace || - !(bbio->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK))) { + !(bioc->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK))) { /* * We shouldn't be scrubbing a missing device. Even for dev * replace, we should only get here for RAID 5/6. We either * managed to mount something with no mirrors remaining or * there's a bug in scrub_remap_extent()/btrfs_map_block(). */ - goto bbio_out; + goto bioc_out; } bio = btrfs_io_bio_alloc(0); @@ -2231,7 +2230,7 @@ static void scrub_missing_raid56_pages(struct scrub_block *sblock) bio->bi_private = sblock; bio->bi_end_io = scrub_missing_raid56_end_io; - rbio = raid56_alloc_missing_rbio(fs_info, bio, bbio, length); + rbio = raid56_alloc_missing_rbio(fs_info, bio, bioc, length); if (!rbio) goto rbio_out; @@ -2249,9 +2248,9 @@ static void scrub_missing_raid56_pages(struct scrub_block *sblock) rbio_out: bio_put(bio); -bbio_out: +bioc_out: btrfs_bio_counter_dec(fs_info); - btrfs_put_bbio(bbio); + btrfs_put_bioc(bioc); spin_lock(&sctx->stat_lock); sctx->stat.malloc_errors++; spin_unlock(&sctx->stat_lock); @@ -2826,7 +2825,7 @@ static void scrub_parity_check_and_repair(struct scrub_parity *sparity) struct btrfs_fs_info *fs_info = sctx->fs_info; struct bio *bio; struct btrfs_raid_bio *rbio; - struct btrfs_bio *bbio = NULL; + struct btrfs_io_context *bioc = NULL; u64 length; int ret; @@ -2838,16 +2837,16 @@ static void scrub_parity_check_and_repair(struct scrub_parity *sparity) btrfs_bio_counter_inc_blocked(fs_info); ret = btrfs_map_sblock(fs_info, BTRFS_MAP_WRITE, sparity->logic_start, - &length, &bbio); - if (ret || !bbio || !bbio->raid_map) - goto bbio_out; + &length, &bioc); + if (ret || !bioc || !bioc->raid_map) + goto bioc_out; bio = btrfs_io_bio_alloc(0); bio->bi_iter.bi_sector = sparity->logic_start >> 9; bio->bi_private = sparity; bio->bi_end_io = scrub_parity_bio_endio; - rbio = raid56_parity_alloc_scrub_rbio(fs_info, bio, bbio, + rbio = raid56_parity_alloc_scrub_rbio(fs_info, bio, bioc, length, sparity->scrub_dev, sparity->dbitmap, sparity->nsectors); @@ -2860,9 +2859,9 @@ static void scrub_parity_check_and_repair(struct scrub_parity *sparity) rbio_out: bio_put(bio); -bbio_out: +bioc_out: btrfs_bio_counter_dec(fs_info); - btrfs_put_bbio(bbio); + btrfs_put_bioc(bioc); bitmap_or(sparity->ebitmap, sparity->ebitmap, sparity->dbitmap, sparity->nsectors); spin_lock(&sctx->stat_lock); @@ -2901,7 +2900,7 @@ static noinline_for_stack int scrub_raid56_parity(struct scrub_ctx *sctx, struct btrfs_root *root = fs_info->extent_root; struct btrfs_root *csum_root = fs_info->csum_root; struct btrfs_extent_item *extent; - struct btrfs_bio *bbio = NULL; + struct btrfs_io_context *bioc = NULL; u64 flags; int ret; int slot; @@ -3044,22 +3043,22 @@ static noinline_for_stack int scrub_raid56_parity(struct scrub_ctx *sctx, extent_len); mapped_length = extent_len; - bbio = NULL; + bioc = NULL; ret = btrfs_map_block(fs_info, BTRFS_MAP_READ, - extent_logical, &mapped_length, &bbio, + extent_logical, &mapped_length, &bioc, 0); if (!ret) { - if (!bbio || mapped_length < extent_len) + if (!bioc || mapped_length < extent_len) ret = -EIO; } if (ret) { - btrfs_put_bbio(bbio); + btrfs_put_bioc(bioc); goto out; } - extent_physical = bbio->stripes[0].physical; - extent_mirror_num = bbio->mirror_num; - extent_dev = bbio->stripes[0].dev; - btrfs_put_bbio(bbio); + extent_physical = bioc->stripes[0].physical; + extent_mirror_num = bioc->mirror_num; + extent_dev = bioc->stripes[0].dev; + btrfs_put_bioc(bioc); ret = btrfs_lookup_csums_range(csum_root, extent_logical, @@ -4311,20 +4310,20 @@ static void scrub_remap_extent(struct btrfs_fs_info *fs_info, int *extent_mirror_num) { u64 mapped_length; - struct btrfs_bio *bbio = NULL; + struct btrfs_io_context *bioc = NULL; int ret; mapped_length = extent_len; ret = btrfs_map_block(fs_info, BTRFS_MAP_READ, extent_logical, - &mapped_length, &bbio, 0); - if (ret || !bbio || mapped_length < extent_len || - !bbio->stripes[0].dev->bdev) { - btrfs_put_bbio(bbio); + &mapped_length, &bioc, 0); + if (ret || !bioc || mapped_length < extent_len || + !bioc->stripes[0].dev->bdev) { + btrfs_put_bioc(bioc); return; } - *extent_physical = bbio->stripes[0].physical; - *extent_mirror_num = bbio->mirror_num; - *extent_dev = bbio->stripes[0].dev; - btrfs_put_bbio(bbio); + *extent_physical = bioc->stripes[0].physical; + *extent_mirror_num = bioc->mirror_num; + *extent_dev = bioc->stripes[0].dev; + btrfs_put_bioc(bioc); } diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 89ce0b449c22a1..2a93d80be9bf7b 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -251,7 +251,7 @@ static void btrfs_dev_stat_print_on_load(struct btrfs_device *device); static int __btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op, u64 logical, u64 *length, - struct btrfs_bio **bbio_ret, + struct btrfs_io_context **bioc_ret, int mirror_num, int need_raid_map); /* @@ -5868,7 +5868,7 @@ static int find_live_mirror(struct btrfs_fs_info *fs_info, } /* Bubble-sort the stripe set to put the parity/syndrome stripes last */ -static void sort_parity_stripes(struct btrfs_bio *bbio, int num_stripes) +static void sort_parity_stripes(struct btrfs_io_context *bioc, int num_stripes) { int i; int again = 1; @@ -5877,52 +5877,53 @@ static void sort_parity_stripes(struct btrfs_bio *bbio, int num_stripes) again = 0; for (i = 0; i < num_stripes - 1; i++) { /* Swap if parity is on a smaller index */ - if (bbio->raid_map[i] > bbio->raid_map[i + 1]) { - swap(bbio->stripes[i], bbio->stripes[i + 1]); - swap(bbio->raid_map[i], bbio->raid_map[i + 1]); + if (bioc->raid_map[i] > bioc->raid_map[i + 1]) { + swap(bioc->stripes[i], bioc->stripes[i + 1]); + swap(bioc->raid_map[i], bioc->raid_map[i + 1]); again = 1; } } } } -static struct btrfs_bio *alloc_btrfs_bio(int total_stripes, int real_stripes) +static struct btrfs_io_context *alloc_btrfs_io_context(int total_stripes, + int real_stripes) { - struct btrfs_bio *bbio = kzalloc( - /* the size of the btrfs_bio */ - sizeof(struct btrfs_bio) + - /* plus the variable array for the stripes */ - sizeof(struct btrfs_bio_stripe) * (total_stripes) + - /* plus the variable array for the tgt dev */ + struct btrfs_io_context *bioc = kzalloc( + /* The size of btrfs_io_context */ + sizeof(struct btrfs_io_context) + + /* Plus the variable array for the stripes */ + sizeof(struct btrfs_io_stripe) * (total_stripes) + + /* Plus the variable array for the tgt dev */ sizeof(int) * (real_stripes) + /* - * plus the raid_map, which includes both the tgt dev - * and the stripes + * Plus the raid_map, which includes both the tgt dev + * and the stripes. */ sizeof(u64) * (total_stripes), GFP_NOFS|__GFP_NOFAIL); - atomic_set(&bbio->error, 0); - refcount_set(&bbio->refs, 1); + atomic_set(&bioc->error, 0); + refcount_set(&bioc->refs, 1); - bbio->tgtdev_map = (int *)(bbio->stripes + total_stripes); - bbio->raid_map = (u64 *)(bbio->tgtdev_map + real_stripes); + bioc->tgtdev_map = (int *)(bioc->stripes + total_stripes); + bioc->raid_map = (u64 *)(bioc->tgtdev_map + real_stripes); - return bbio; + return bioc; } -void btrfs_get_bbio(struct btrfs_bio *bbio) +void btrfs_get_bioc(struct btrfs_io_context *bioc) { - WARN_ON(!refcount_read(&bbio->refs)); - refcount_inc(&bbio->refs); + WARN_ON(!refcount_read(&bioc->refs)); + refcount_inc(&bioc->refs); } -void btrfs_put_bbio(struct btrfs_bio *bbio) +void btrfs_put_bioc(struct btrfs_io_context *bioc) { - if (!bbio) + if (!bioc) return; - if (refcount_dec_and_test(&bbio->refs)) - kfree(bbio); + if (refcount_dec_and_test(&bioc->refs)) + kfree(bioc); } /* can REQ_OP_DISCARD be sent with other REQ like REQ_OP_WRITE? */ @@ -5932,11 +5933,11 @@ void btrfs_put_bbio(struct btrfs_bio *bbio) */ static int __btrfs_map_block_for_discard(struct btrfs_fs_info *fs_info, u64 logical, u64 *length_ret, - struct btrfs_bio **bbio_ret) + struct btrfs_io_context **bioc_ret) { struct extent_map *em; struct map_lookup *map; - struct btrfs_bio *bbio; + struct btrfs_io_context *bioc; u64 length = *length_ret; u64 offset; u64 stripe_nr; @@ -5955,8 +5956,8 @@ static int __btrfs_map_block_for_discard(struct btrfs_fs_info *fs_info, int ret = 0; int i; - /* discard always return a bbio */ - ASSERT(bbio_ret); + /* Discard always returns a bioc. */ + ASSERT(bioc_ret); em = btrfs_get_chunk_map(fs_info, logical, length); if (IS_ERR(em)) @@ -6019,26 +6020,25 @@ static int __btrfs_map_block_for_discard(struct btrfs_fs_info *fs_info, &stripe_index); } - bbio = alloc_btrfs_bio(num_stripes, 0); - if (!bbio) { + bioc = alloc_btrfs_io_context(num_stripes, 0); + if (!bioc) { ret = -ENOMEM; goto out; } for (i = 0; i < num_stripes; i++) { - bbio->stripes[i].physical = + bioc->stripes[i].physical = map->stripes[stripe_index].physical + stripe_offset + stripe_nr * map->stripe_len; - bbio->stripes[i].dev = map->stripes[stripe_index].dev; + bioc->stripes[i].dev = map->stripes[stripe_index].dev; if (map->type & (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID10)) { - bbio->stripes[i].length = stripes_per_dev * + bioc->stripes[i].length = stripes_per_dev * map->stripe_len; if (i / sub_stripes < remaining_stripes) - bbio->stripes[i].length += - map->stripe_len; + bioc->stripes[i].length += map->stripe_len; /* * Special for the first stripe and @@ -6049,19 +6049,17 @@ static int __btrfs_map_block_for_discard(struct btrfs_fs_info *fs_info, * off end_off */ if (i < sub_stripes) - bbio->stripes[i].length -= - stripe_offset; + bioc->stripes[i].length -= stripe_offset; if (stripe_index >= last_stripe && stripe_index <= (last_stripe + sub_stripes - 1)) - bbio->stripes[i].length -= - stripe_end_offset; + bioc->stripes[i].length -= stripe_end_offset; if (i == sub_stripes - 1) stripe_offset = 0; } else { - bbio->stripes[i].length = length; + bioc->stripes[i].length = length; } stripe_index++; @@ -6071,9 +6069,9 @@ static int __btrfs_map_block_for_discard(struct btrfs_fs_info *fs_info, } } - *bbio_ret = bbio; - bbio->map_type = map->type; - bbio->num_stripes = num_stripes; + *bioc_ret = bioc; + bioc->map_type = map->type; + bioc->num_stripes = num_stripes; out: free_extent_map(em); return ret; @@ -6097,7 +6095,7 @@ static int get_extra_mirror_from_replace(struct btrfs_fs_info *fs_info, u64 srcdev_devid, int *mirror_num, u64 *physical) { - struct btrfs_bio *bbio = NULL; + struct btrfs_io_context *bioc = NULL; int num_stripes; int index_srcdev = 0; int found = 0; @@ -6106,20 +6104,20 @@ static int get_extra_mirror_from_replace(struct btrfs_fs_info *fs_info, int ret = 0; ret = __btrfs_map_block(fs_info, BTRFS_MAP_GET_READ_MIRRORS, - logical, &length, &bbio, 0, 0); + logical, &length, &bioc, 0, 0); if (ret) { - ASSERT(bbio == NULL); + ASSERT(bioc == NULL); return ret; } - num_stripes = bbio->num_stripes; + num_stripes = bioc->num_stripes; if (*mirror_num > num_stripes) { /* * BTRFS_MAP_GET_READ_MIRRORS does not contain this mirror, * that means that the requested area is not left of the left * cursor */ - btrfs_put_bbio(bbio); + btrfs_put_bioc(bioc); return -EIO; } @@ -6129,7 +6127,7 @@ static int get_extra_mirror_from_replace(struct btrfs_fs_info *fs_info, * pointer to the one of the target drive. */ for (i = 0; i < num_stripes; i++) { - if (bbio->stripes[i].dev->devid != srcdev_devid) + if (bioc->stripes[i].dev->devid != srcdev_devid) continue; /* @@ -6137,15 +6135,15 @@ static int get_extra_mirror_from_replace(struct btrfs_fs_info *fs_info, * mirror with the lowest physical address */ if (found && - physical_of_found <= bbio->stripes[i].physical) + physical_of_found <= bioc->stripes[i].physical) continue; index_srcdev = i; found = 1; - physical_of_found = bbio->stripes[i].physical; + physical_of_found = bioc->stripes[i].physical; } - btrfs_put_bbio(bbio); + btrfs_put_bioc(bioc); ASSERT(found); if (!found) @@ -6176,12 +6174,12 @@ static bool is_block_group_to_copy(struct btrfs_fs_info *fs_info, u64 logical) } static void handle_ops_on_dev_replace(enum btrfs_map_op op, - struct btrfs_bio **bbio_ret, + struct btrfs_io_context **bioc_ret, struct btrfs_dev_replace *dev_replace, u64 logical, int *num_stripes_ret, int *max_errors_ret) { - struct btrfs_bio *bbio = *bbio_ret; + struct btrfs_io_context *bioc = *bioc_ret; u64 srcdev_devid = dev_replace->srcdev->devid; int tgtdev_indexes = 0; int num_stripes = *num_stripes_ret; @@ -6211,17 +6209,17 @@ static void handle_ops_on_dev_replace(enum btrfs_map_op op, */ index_where_to_add = num_stripes; for (i = 0; i < num_stripes; i++) { - if (bbio->stripes[i].dev->devid == srcdev_devid) { + if (bioc->stripes[i].dev->devid == srcdev_devid) { /* write to new disk, too */ - struct btrfs_bio_stripe *new = - bbio->stripes + index_where_to_add; - struct btrfs_bio_stripe *old = - bbio->stripes + i; + struct btrfs_io_stripe *new = + bioc->stripes + index_where_to_add; + struct btrfs_io_stripe *old = + bioc->stripes + i; new->physical = old->physical; new->length = old->length; new->dev = dev_replace->tgtdev; - bbio->tgtdev_map[i] = index_where_to_add; + bioc->tgtdev_map[i] = index_where_to_add; index_where_to_add++; max_errors++; tgtdev_indexes++; @@ -6241,30 +6239,29 @@ static void handle_ops_on_dev_replace(enum btrfs_map_op op, * full copy of the source drive. */ for (i = 0; i < num_stripes; i++) { - if (bbio->stripes[i].dev->devid == srcdev_devid) { + if (bioc->stripes[i].dev->devid == srcdev_devid) { /* * In case of DUP, in order to keep it simple, * only add the mirror with the lowest physical * address */ if (found && - physical_of_found <= - bbio->stripes[i].physical) + physical_of_found <= bioc->stripes[i].physical) continue; index_srcdev = i; found = 1; - physical_of_found = bbio->stripes[i].physical; + physical_of_found = bioc->stripes[i].physical; } } if (found) { - struct btrfs_bio_stripe *tgtdev_stripe = - bbio->stripes + num_stripes; + struct btrfs_io_stripe *tgtdev_stripe = + bioc->stripes + num_stripes; tgtdev_stripe->physical = physical_of_found; tgtdev_stripe->length = - bbio->stripes[index_srcdev].length; + bioc->stripes[index_srcdev].length; tgtdev_stripe->dev = dev_replace->tgtdev; - bbio->tgtdev_map[index_srcdev] = num_stripes; + bioc->tgtdev_map[index_srcdev] = num_stripes; tgtdev_indexes++; num_stripes++; @@ -6273,8 +6270,8 @@ static void handle_ops_on_dev_replace(enum btrfs_map_op op, *num_stripes_ret = num_stripes; *max_errors_ret = max_errors; - bbio->num_tgtdevs = tgtdev_indexes; - *bbio_ret = bbio; + bioc->num_tgtdevs = tgtdev_indexes; + *bioc_ret = bioc; } static bool need_full_stripe(enum btrfs_map_op op) @@ -6377,7 +6374,7 @@ int btrfs_get_io_geometry(struct btrfs_fs_info *fs_info, struct extent_map *em, static int __btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op, u64 logical, u64 *length, - struct btrfs_bio **bbio_ret, + struct btrfs_io_context **bioc_ret, int mirror_num, int need_raid_map) { struct extent_map *em; @@ -6392,7 +6389,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int num_stripes; int max_errors = 0; int tgtdev_indexes = 0; - struct btrfs_bio *bbio = NULL; + struct btrfs_io_context *bioc = NULL; struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace; int dev_replace_is_ongoing = 0; int num_alloc_stripes; @@ -6401,7 +6398,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, u64 raid56_full_stripe_start = (u64)-1; struct btrfs_io_geometry geom; - ASSERT(bbio_ret); + ASSERT(bioc_ret); ASSERT(op != BTRFS_MAP_DISCARD); em = btrfs_get_chunk_map(fs_info, logical, *length); @@ -6545,20 +6542,20 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, tgtdev_indexes = num_stripes; } - bbio = alloc_btrfs_bio(num_alloc_stripes, tgtdev_indexes); - if (!bbio) { + bioc = alloc_btrfs_io_context(num_alloc_stripes, tgtdev_indexes); + if (!bioc) { ret = -ENOMEM; goto out; } for (i = 0; i < num_stripes; i++) { - bbio->stripes[i].physical = map->stripes[stripe_index].physical + + bioc->stripes[i].physical = map->stripes[stripe_index].physical + stripe_offset + stripe_nr * map->stripe_len; - bbio->stripes[i].dev = map->stripes[stripe_index].dev; + bioc->stripes[i].dev = map->stripes[stripe_index].dev; stripe_index++; } - /* build raid_map */ + /* Build raid_map */ if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK && need_raid_map && (need_full_stripe(op) || mirror_num > 1)) { u64 tmp; @@ -6570,15 +6567,15 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, /* Fill in the logical address of each stripe */ tmp = stripe_nr * data_stripes; for (i = 0; i < data_stripes; i++) - bbio->raid_map[(i+rot) % num_stripes] = + bioc->raid_map[(i + rot) % num_stripes] = em->start + (tmp + i) * map->stripe_len; - bbio->raid_map[(i+rot) % map->num_stripes] = RAID5_P_STRIPE; + bioc->raid_map[(i + rot) % map->num_stripes] = RAID5_P_STRIPE; if (map->type & BTRFS_BLOCK_GROUP_RAID6) - bbio->raid_map[(i+rot+1) % num_stripes] = + bioc->raid_map[(i + rot + 1) % num_stripes] = RAID6_Q_STRIPE; - sort_parity_stripes(bbio, num_stripes); + sort_parity_stripes(bioc, num_stripes); } if (need_full_stripe(op)) @@ -6586,15 +6583,15 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, if (dev_replace_is_ongoing && dev_replace->tgtdev != NULL && need_full_stripe(op)) { - handle_ops_on_dev_replace(op, &bbio, dev_replace, logical, + handle_ops_on_dev_replace(op, &bioc, dev_replace, logical, &num_stripes, &max_errors); } - *bbio_ret = bbio; - bbio->map_type = map->type; - bbio->num_stripes = num_stripes; - bbio->max_errors = max_errors; - bbio->mirror_num = mirror_num; + *bioc_ret = bioc; + bioc->map_type = map->type; + bioc->num_stripes = num_stripes; + bioc->max_errors = max_errors; + bioc->mirror_num = mirror_num; /* * this is the case that REQ_READ && dev_replace_is_ongoing && @@ -6603,9 +6600,9 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, */ if (patch_the_first_stripe_for_dev_replace && num_stripes > 0) { WARN_ON(num_stripes > 1); - bbio->stripes[0].dev = dev_replace->tgtdev; - bbio->stripes[0].physical = physical_to_patch_in_first_stripe; - bbio->mirror_num = map->num_stripes + 1; + bioc->stripes[0].dev = dev_replace->tgtdev; + bioc->stripes[0].physical = physical_to_patch_in_first_stripe; + bioc->mirror_num = map->num_stripes + 1; } out: if (dev_replace_is_ongoing) { @@ -6619,40 +6616,40 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op, u64 logical, u64 *length, - struct btrfs_bio **bbio_ret, int mirror_num) + struct btrfs_io_context **bioc_ret, int mirror_num) { if (op == BTRFS_MAP_DISCARD) return __btrfs_map_block_for_discard(fs_info, logical, - length, bbio_ret); + length, bioc_ret); - return __btrfs_map_block(fs_info, op, logical, length, bbio_ret, + return __btrfs_map_block(fs_info, op, logical, length, bioc_ret, mirror_num, 0); } /* For Scrub/replace */ int btrfs_map_sblock(struct btrfs_fs_info *fs_info, enum btrfs_map_op op, u64 logical, u64 *length, - struct btrfs_bio **bbio_ret) + struct btrfs_io_context **bioc_ret) { - return __btrfs_map_block(fs_info, op, logical, length, bbio_ret, 0, 1); + return __btrfs_map_block(fs_info, op, logical, length, bioc_ret, 0, 1); } -static inline void btrfs_end_bbio(struct btrfs_bio *bbio, struct bio *bio) +static inline void btrfs_end_bioc(struct btrfs_io_context *bioc, struct bio *bio) { - bio->bi_private = bbio->private; - bio->bi_end_io = bbio->end_io; + bio->bi_private = bioc->private; + bio->bi_end_io = bioc->end_io; bio_endio(bio); - btrfs_put_bbio(bbio); + btrfs_put_bioc(bioc); } static void btrfs_end_bio(struct bio *bio) { - struct btrfs_bio *bbio = bio->bi_private; + struct btrfs_io_context *bioc = bio->bi_private; int is_orig_bio = 0; if (bio->bi_status) { - atomic_inc(&bbio->error); + atomic_inc(&bioc->error); if (bio->bi_status == BLK_STS_IOERR || bio->bi_status == BLK_STS_TARGET) { struct btrfs_device *dev = btrfs_io_bio(bio)->device; @@ -6670,22 +6667,22 @@ static void btrfs_end_bio(struct bio *bio) } } - if (bio == bbio->orig_bio) + if (bio == bioc->orig_bio) is_orig_bio = 1; - btrfs_bio_counter_dec(bbio->fs_info); + btrfs_bio_counter_dec(bioc->fs_info); - if (atomic_dec_and_test(&bbio->stripes_pending)) { + if (atomic_dec_and_test(&bioc->stripes_pending)) { if (!is_orig_bio) { bio_put(bio); - bio = bbio->orig_bio; + bio = bioc->orig_bio; } - btrfs_io_bio(bio)->mirror_num = bbio->mirror_num; + btrfs_io_bio(bio)->mirror_num = bioc->mirror_num; /* only send an error to the higher layers if it is * beyond the tolerance of the btrfs bio */ - if (atomic_read(&bbio->error) > bbio->max_errors) { + if (atomic_read(&bioc->error) > bioc->max_errors) { bio->bi_status = BLK_STS_IOERR; } else { /* @@ -6695,18 +6692,18 @@ static void btrfs_end_bio(struct bio *bio) bio->bi_status = BLK_STS_OK; } - btrfs_end_bbio(bbio, bio); + btrfs_end_bioc(bioc, bio); } else if (!is_orig_bio) { bio_put(bio); } } -static void submit_stripe_bio(struct btrfs_bio *bbio, struct bio *bio, +static void submit_stripe_bio(struct btrfs_io_context *bioc, struct bio *bio, u64 physical, struct btrfs_device *dev) { - struct btrfs_fs_info *fs_info = bbio->fs_info; + struct btrfs_fs_info *fs_info = bioc->fs_info; - bio->bi_private = bbio; + bio->bi_private = bioc; btrfs_io_bio(bio)->device = dev; bio->bi_end_io = btrfs_end_bio; bio->bi_iter.bi_sector = physical >> 9; @@ -6736,20 +6733,20 @@ static void submit_stripe_bio(struct btrfs_bio *bbio, struct bio *bio, btrfsic_submit_bio(bio); } -static void bbio_error(struct btrfs_bio *bbio, struct bio *bio, u64 logical) +static void bioc_error(struct btrfs_io_context *bioc, struct bio *bio, u64 logical) { - atomic_inc(&bbio->error); - if (atomic_dec_and_test(&bbio->stripes_pending)) { + atomic_inc(&bioc->error); + if (atomic_dec_and_test(&bioc->stripes_pending)) { /* Should be the original bio. */ - WARN_ON(bio != bbio->orig_bio); + WARN_ON(bio != bioc->orig_bio); - btrfs_io_bio(bio)->mirror_num = bbio->mirror_num; + btrfs_io_bio(bio)->mirror_num = bioc->mirror_num; bio->bi_iter.bi_sector = logical >> 9; - if (atomic_read(&bbio->error) > bbio->max_errors) + if (atomic_read(&bioc->error) > bioc->max_errors) bio->bi_status = BLK_STS_IOERR; else bio->bi_status = BLK_STS_OK; - btrfs_end_bbio(bbio, bio); + btrfs_end_bioc(bioc, bio); } } @@ -6764,35 +6761,35 @@ blk_status_t btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio, int ret; int dev_nr; int total_devs; - struct btrfs_bio *bbio = NULL; + struct btrfs_io_context *bioc = NULL; length = bio->bi_iter.bi_size; map_length = length; btrfs_bio_counter_inc_blocked(fs_info); ret = __btrfs_map_block(fs_info, btrfs_op(bio), logical, - &map_length, &bbio, mirror_num, 1); + &map_length, &bioc, mirror_num, 1); if (ret) { btrfs_bio_counter_dec(fs_info); return errno_to_blk_status(ret); } - total_devs = bbio->num_stripes; - bbio->orig_bio = first_bio; - bbio->private = first_bio->bi_private; - bbio->end_io = first_bio->bi_end_io; - bbio->fs_info = fs_info; - atomic_set(&bbio->stripes_pending, bbio->num_stripes); + total_devs = bioc->num_stripes; + bioc->orig_bio = first_bio; + bioc->private = first_bio->bi_private; + bioc->end_io = first_bio->bi_end_io; + bioc->fs_info = fs_info; + atomic_set(&bioc->stripes_pending, bioc->num_stripes); - if ((bbio->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK) && + if ((bioc->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK) && ((btrfs_op(bio) == BTRFS_MAP_WRITE) || (mirror_num > 1))) { /* In this case, map_length has been set to the length of a single stripe; not the whole write */ if (btrfs_op(bio) == BTRFS_MAP_WRITE) { - ret = raid56_parity_write(fs_info, bio, bbio, + ret = raid56_parity_write(fs_info, bio, bioc, map_length); } else { - ret = raid56_parity_recover(fs_info, bio, bbio, + ret = raid56_parity_recover(fs_info, bio, bioc, map_length, mirror_num, 1); } @@ -6808,12 +6805,12 @@ blk_status_t btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio, } for (dev_nr = 0; dev_nr < total_devs; dev_nr++) { - dev = bbio->stripes[dev_nr].dev; + dev = bioc->stripes[dev_nr].dev; if (!dev || !dev->bdev || test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state) || (btrfs_op(first_bio) == BTRFS_MAP_WRITE && !test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state))) { - bbio_error(bbio, first_bio, logical); + bioc_error(bioc, first_bio, logical); continue; } @@ -6822,7 +6819,7 @@ blk_status_t btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio, else bio = first_bio; - submit_stripe_bio(bbio, bio, bbio->stripes[dev_nr].physical, dev); + submit_stripe_bio(bioc, bio, bioc->stripes[dev_nr].physical, dev); } btrfs_bio_counter_dec(fs_info); return BLK_STS_OK; diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 30288b728bbbdc..dfd7457709b325 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -306,11 +306,11 @@ struct btrfs_fs_devices { /* * we need the mirror number and stripe index to be passed around * the call chain while we are processing end_io (especially errors). - * Really, what we need is a btrfs_bio structure that has this info + * Really, what we need is a btrfs_io_context structure that has this info * and is properly sized with its stripe array, but we're not there * quite yet. We have our own btrfs bioset, and all of the bios * we allocate are actually btrfs_io_bios. We'll cram as much of - * struct btrfs_bio as we can into this over time. + * struct btrfs_io_context as we can into this over time. */ struct btrfs_io_bio { unsigned int mirror_num; @@ -339,13 +339,29 @@ static inline void btrfs_io_bio_free_csum(struct btrfs_io_bio *io_bio) } } -struct btrfs_bio_stripe { +struct btrfs_io_stripe { struct btrfs_device *dev; u64 physical; u64 length; /* only used for discard mappings */ }; -struct btrfs_bio { +/* + * Context for IO subsmission for device stripe. + * + * - Track the unfinished mirrors for mirror based profiles + * Mirror based profiles are SINGLE/DUP/RAID1/RAID10. + * + * - Contain the logical -> physical mapping info + * Used by submit_stripe_bio() for mapping logical bio + * into physical device address. + * + * - Contain device replace info + * Used by handle_ops_on_dev_replace() to copy logical bios + * into the new device. + * + * - Contain RAID56 full stripe logical bytenrs + */ +struct btrfs_io_context { refcount_t refs; atomic_t stripes_pending; struct btrfs_fs_info *fs_info; @@ -365,7 +381,7 @@ struct btrfs_bio { * so raid_map[0] is the start of our full stripe */ u64 *raid_map; - struct btrfs_bio_stripe stripes[]; + struct btrfs_io_stripe stripes[]; }; struct btrfs_device_info { @@ -400,11 +416,11 @@ struct map_lookup { int num_stripes; int sub_stripes; int verified_stripes; /* For mount time dev extent verification */ - struct btrfs_bio_stripe stripes[]; + struct btrfs_io_stripe stripes[]; }; #define map_lookup_size(n) (sizeof(struct map_lookup) + \ - (sizeof(struct btrfs_bio_stripe) * (n))) + (sizeof(struct btrfs_io_stripe) * (n))) struct btrfs_balance_args; struct btrfs_balance_progress; @@ -457,14 +473,14 @@ static inline enum btrfs_map_op btrfs_op(struct bio *bio) } } -void btrfs_get_bbio(struct btrfs_bio *bbio); -void btrfs_put_bbio(struct btrfs_bio *bbio); +void btrfs_get_bioc(struct btrfs_io_context *bioc); +void btrfs_put_bioc(struct btrfs_io_context *bioc); int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op, u64 logical, u64 *length, - struct btrfs_bio **bbio_ret, int mirror_num); + struct btrfs_io_context **bioc_ret, int mirror_num); int btrfs_map_sblock(struct btrfs_fs_info *fs_info, enum btrfs_map_op op, u64 logical, u64 *length, - struct btrfs_bio **bbio_ret); + struct btrfs_io_context **bioc_ret); int btrfs_get_io_geometry(struct btrfs_fs_info *fs_info, struct extent_map *map, enum btrfs_map_op op, u64 logical, struct btrfs_io_geometry *io_geom); diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c index 3bc2f92cd197ad..ce4eeffc4f12f0 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c @@ -1502,27 +1502,27 @@ int btrfs_zoned_issue_zeroout(struct btrfs_device *device, u64 physical, u64 len static int read_zone_info(struct btrfs_fs_info *fs_info, u64 logical, struct blk_zone *zone) { - struct btrfs_bio *bbio = NULL; + struct btrfs_io_context *bioc = NULL; u64 mapped_length = PAGE_SIZE; unsigned int nofs_flag; int nmirrors; int i, ret; ret = btrfs_map_sblock(fs_info, BTRFS_MAP_GET_READ_MIRRORS, logical, - &mapped_length, &bbio); - if (ret || !bbio || mapped_length < PAGE_SIZE) { - btrfs_put_bbio(bbio); + &mapped_length, &bioc); + if (ret || !bioc || mapped_length < PAGE_SIZE) { + btrfs_put_bioc(bioc); return -EIO; } - if (bbio->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK) + if (bioc->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK) return -EINVAL; nofs_flag = memalloc_nofs_save(); - nmirrors = (int)bbio->num_stripes; + nmirrors = (int)bioc->num_stripes; for (i = 0; i < nmirrors; i++) { - u64 physical = bbio->stripes[i].physical; - struct btrfs_device *dev = bbio->stripes[i].dev; + u64 physical = bioc->stripes[i].physical; + struct btrfs_device *dev = bioc->stripes[i].dev; /* Missing device */ if (!dev->bdev) From d300ced1288937d727cad69b3374f489f3e24795 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 30 Jun 2022 18:03:19 +0200 Subject: [PATCH 0605/1056] btrfs: zoned: fix a leaked bioc in read_zone_info [ Upstream commit 2963457829decf0c824a443238d251151ed18ff5 ] The bioc would leak on the normal completion path and also on the RAID56 check (but that one won't happen in practice due to the invalid combination with zoned mode). Fixes: 7db1c5d14dcd ("btrfs: zoned: support dev-replace in zoned filesystems") CC: stable@vger.kernel.org # 5.16+ Reviewed-by: Anand Jain Signed-off-by: Christoph Hellwig [ update changelog ] Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Sasha Levin --- fs/btrfs/zoned.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c index ce4eeffc4f12f0..574769f921a22b 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c @@ -1511,12 +1511,14 @@ static int read_zone_info(struct btrfs_fs_info *fs_info, u64 logical, ret = btrfs_map_sblock(fs_info, BTRFS_MAP_GET_READ_MIRRORS, logical, &mapped_length, &bioc); if (ret || !bioc || mapped_length < PAGE_SIZE) { - btrfs_put_bioc(bioc); - return -EIO; + ret = -EIO; + goto out_put_bioc; } - if (bioc->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK) - return -EINVAL; + if (bioc->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK) { + ret = -EINVAL; + goto out_put_bioc; + } nofs_flag = memalloc_nofs_save(); nmirrors = (int)bioc->num_stripes; @@ -1535,7 +1537,8 @@ static int read_zone_info(struct btrfs_fs_info *fs_info, u64 logical, break; } memalloc_nofs_restore(nofs_flag); - +out_put_bioc: + btrfs_put_bioc(bioc); return ret; } From 4e69750549b8cf567e29d2b18979218975469265 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Thu, 2 Jun 2022 10:07:38 +0900 Subject: [PATCH 0606/1056] ksmbd: use SOCK_NONBLOCK type for kernel_accept() [ Upstream commit fe0fde09e1cb83effcf8fafa372533f438d93a1a ] I found that normally it is O_NONBLOCK but there are different value for some arch. /include/linux/net.h: #ifndef SOCK_NONBLOCK #define SOCK_NONBLOCK O_NONBLOCK #endif /arch/alpha/include/asm/socket.h: #define SOCK_NONBLOCK 0x40000000 Use SOCK_NONBLOCK instead of O_NONBLOCK for kernel_accept(). Suggested-by: David Howells Signed-off-by: Namjae Jeon Reviewed-by: Hyunchul Lee Signed-off-by: Steve French Signed-off-by: Sasha Levin --- fs/ksmbd/transport_tcp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ksmbd/transport_tcp.c b/fs/ksmbd/transport_tcp.c index 82a1429bbe127e..755329c295cabd 100644 --- a/fs/ksmbd/transport_tcp.c +++ b/fs/ksmbd/transport_tcp.c @@ -230,7 +230,7 @@ static int ksmbd_kthread_fn(void *p) break; } ret = kernel_accept(iface->ksmbd_socket, &client_sk, - O_NONBLOCK); + SOCK_NONBLOCK); mutex_unlock(&iface->sock_release_lock); if (ret) { if (ret == -EAGAIN) From 10f2cd373e65bcd3be8f3cdc71c330c25763dfd8 Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Thu, 23 Jun 2022 13:25:09 -0500 Subject: [PATCH 0607/1056] powerpc/xive/spapr: correct bitmap allocation size MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 19fc5bb93c6bbdce8292b4d7eed04e2fa118d2fe ] kasan detects access beyond the end of the xibm->bitmap allocation: BUG: KASAN: slab-out-of-bounds in _find_first_zero_bit+0x40/0x140 Read of size 8 at addr c00000001d1d0118 by task swapper/0/1 CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.19.0-rc2-00001-g90df023b36dd #28 Call Trace: [c00000001d98f770] [c0000000012baab8] dump_stack_lvl+0xac/0x108 (unreliable) [c00000001d98f7b0] [c00000000068faac] print_report+0x37c/0x710 [c00000001d98f880] [c0000000006902c0] kasan_report+0x110/0x354 [c00000001d98f950] [c000000000692324] __asan_load8+0xa4/0xe0 [c00000001d98f970] [c0000000011c6ed0] _find_first_zero_bit+0x40/0x140 [c00000001d98f9b0] [c0000000000dbfbc] xive_spapr_get_ipi+0xcc/0x260 [c00000001d98fa70] [c0000000000d6d28] xive_setup_cpu_ipi+0x1e8/0x450 [c00000001d98fb30] [c000000004032a20] pSeries_smp_probe+0x5c/0x118 [c00000001d98fb60] [c000000004018b44] smp_prepare_cpus+0x944/0x9ac [c00000001d98fc90] [c000000004009f9c] kernel_init_freeable+0x2d4/0x640 [c00000001d98fd90] [c0000000000131e8] kernel_init+0x28/0x1d0 [c00000001d98fe10] [c00000000000cd54] ret_from_kernel_thread+0x5c/0x64 Allocated by task 0: kasan_save_stack+0x34/0x70 __kasan_kmalloc+0xb4/0xf0 __kmalloc+0x268/0x540 xive_spapr_init+0x4d0/0x77c pseries_init_irq+0x40/0x27c init_IRQ+0x44/0x84 start_kernel+0x2a4/0x538 start_here_common+0x1c/0x20 The buggy address belongs to the object at c00000001d1d0118 which belongs to the cache kmalloc-8 of size 8 The buggy address is located 0 bytes inside of 8-byte region [c00000001d1d0118, c00000001d1d0120) The buggy address belongs to the physical page: page:c00c000000074740 refcount:1 mapcount:0 mapping:0000000000000000 index:0xc00000001d1d0558 pfn:0x1d1d flags: 0x7ffff000000200(slab|node=0|zone=0|lastcpupid=0x7ffff) raw: 007ffff000000200 c00000001d0003c8 c00000001d0003c8 c00000001d010480 raw: c00000001d1d0558 0000000001e1000a 00000001ffffffff 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: c00000001d1d0000: fc 00 fc fc fc fc fc fc fc fc fc fc fc fc fc fc c00000001d1d0080: fc fc 00 fc fc fc fc fc fc fc fc fc fc fc fc fc >c00000001d1d0100: fc fc fc 02 fc fc fc fc fc fc fc fc fc fc fc fc ^ c00000001d1d0180: fc fc fc fc 04 fc fc fc fc fc fc fc fc fc fc fc c00000001d1d0200: fc fc fc fc fc 04 fc fc fc fc fc fc fc fc fc fc This happens because the allocation uses the wrong unit (bits) when it should pass (BITS_TO_LONGS(count) * sizeof(long)) or equivalent. With small numbers of bits, the allocated object can be smaller than sizeof(long), which results in invalid accesses. Use bitmap_zalloc() to allocate and initialize the irq bitmap, paired with bitmap_free() for consistency. Signed-off-by: Nathan Lynch Reviewed-by: Cédric Le Goater Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220623182509.3985625-1-nathanl@linux.ibm.com Signed-off-by: Sasha Levin --- arch/powerpc/sysdev/xive/spapr.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/sysdev/xive/spapr.c b/arch/powerpc/sysdev/xive/spapr.c index a82f32fbe77279..583b2c6df3909b 100644 --- a/arch/powerpc/sysdev/xive/spapr.c +++ b/arch/powerpc/sysdev/xive/spapr.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -55,7 +56,7 @@ static int xive_irq_bitmap_add(int base, int count) spin_lock_init(&xibm->lock); xibm->base = base; xibm->count = count; - xibm->bitmap = kzalloc(xibm->count, GFP_KERNEL); + xibm->bitmap = bitmap_zalloc(xibm->count, GFP_KERNEL); if (!xibm->bitmap) { kfree(xibm); return -ENOMEM; @@ -73,7 +74,7 @@ static void xive_irq_bitmap_remove_all(void) list_for_each_entry_safe(xibm, tmp, &xive_irq_bitmaps, list) { list_del(&xibm->list); - kfree(xibm->bitmap); + bitmap_free(xibm->bitmap); kfree(xibm); } } From b34dbeb2b0ece777a2256e820aaa809aff56abfd Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Mon, 13 Jun 2022 10:59:58 +0300 Subject: [PATCH 0608/1056] vdpa/mlx5: Initialize CVQ vringh only once MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit ace9252446ec615cd79a5f77d90edb25c0b9d024 ] Currently, CVQ vringh is initialized inside setup_virtqueues() which is called every time a memory update is done. This is undesirable since it resets all the context of the vring, including the available and used indices. Move the initialization to mlx5_vdpa_set_status() when VIRTIO_CONFIG_S_DRIVER_OK is set. Signed-off-by: Eli Cohen Message-Id: <20220613075958.511064-2-elic@nvidia.com> Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang Acked-by: Eugenio Pérez Signed-off-by: Sasha Levin --- drivers/vdpa/mlx5/net/mlx5_vnet.c | 31 ++++++++++++++++++++----------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c index 467a349dc26c0d..e748c00789f041 100644 --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c @@ -1898,7 +1898,6 @@ static int verify_driver_features(struct mlx5_vdpa_dev *mvdev, u64 features) static int setup_virtqueues(struct mlx5_vdpa_dev *mvdev) { struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); - struct mlx5_control_vq *cvq = &mvdev->cvq; int err; int i; @@ -1908,16 +1907,6 @@ static int setup_virtqueues(struct mlx5_vdpa_dev *mvdev) goto err_vq; } - if (mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)) { - err = vringh_init_iotlb(&cvq->vring, mvdev->actual_features, - MLX5_CVQ_MAX_ENT, false, - (struct vring_desc *)(uintptr_t)cvq->desc_addr, - (struct vring_avail *)(uintptr_t)cvq->driver_addr, - (struct vring_used *)(uintptr_t)cvq->device_addr); - if (err) - goto err_vq; - } - return 0; err_vq: @@ -2184,6 +2173,21 @@ static void clear_vqs_ready(struct mlx5_vdpa_net *ndev) ndev->mvdev.cvq.ready = false; } +static int setup_cvq_vring(struct mlx5_vdpa_dev *mvdev) +{ + struct mlx5_control_vq *cvq = &mvdev->cvq; + int err = 0; + + if (mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)) + err = vringh_init_iotlb(&cvq->vring, mvdev->actual_features, + MLX5_CVQ_MAX_ENT, false, + (struct vring_desc *)(uintptr_t)cvq->desc_addr, + (struct vring_avail *)(uintptr_t)cvq->driver_addr, + (struct vring_used *)(uintptr_t)cvq->device_addr); + + return err; +} + static void mlx5_vdpa_set_status(struct vdpa_device *vdev, u8 status) { struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); @@ -2194,6 +2198,11 @@ static void mlx5_vdpa_set_status(struct vdpa_device *vdev, u8 status) if ((status ^ ndev->mvdev.status) & VIRTIO_CONFIG_S_DRIVER_OK) { if (status & VIRTIO_CONFIG_S_DRIVER_OK) { + err = setup_cvq_vring(mvdev); + if (err) { + mlx5_vdpa_warn(mvdev, "failed to setup control VQ vring\n"); + goto err_setup; + } err = setup_driver(mvdev); if (err) { mlx5_vdpa_warn(mvdev, "failed to setup driver\n"); From 65d7a723fdda415eec901b7d7805e68f23359017 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Mon, 13 Jun 2022 22:52:23 +0300 Subject: [PATCH 0609/1056] vduse: Tie vduse mgmtdev and its device [ Upstream commit 0e0348ac3f0a6e6606f1aa5acb1803ada913aa3d ] vduse devices are not backed by any real devices such as PCI. Hence it doesn't have any parent device linked to it. Kernel driver model in [1] suggests to avoid an empty device release callback. Hence tie the mgmtdevice object's life cycle to an allocate dummy struct device instead of static one. [1] https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/Documentation/core-api/kobject.rst?h=v5.18-rc7#n284 Signed-off-by: Parav Pandit Message-Id: <20220613195223.473966-1-parav@nvidia.com> Signed-off-by: Michael S. Tsirkin Reviewed-by: Xie Yongji Acked-by: Jason Wang Signed-off-by: Sasha Levin --- drivers/vdpa/vdpa_user/vduse_dev.c | 60 ++++++++++++++++++------------ 1 file changed, 37 insertions(+), 23 deletions(-) diff --git a/drivers/vdpa/vdpa_user/vduse_dev.c b/drivers/vdpa/vdpa_user/vduse_dev.c index 9270398caf1559..73e67fa88972a5 100644 --- a/drivers/vdpa/vdpa_user/vduse_dev.c +++ b/drivers/vdpa/vdpa_user/vduse_dev.c @@ -1466,16 +1466,12 @@ static char *vduse_devnode(struct device *dev, umode_t *mode) return kasprintf(GFP_KERNEL, "vduse/%s", dev_name(dev)); } -static void vduse_mgmtdev_release(struct device *dev) -{ -} - -static struct device vduse_mgmtdev = { - .init_name = "vduse", - .release = vduse_mgmtdev_release, +struct vduse_mgmt_dev { + struct vdpa_mgmt_dev mgmt_dev; + struct device dev; }; -static struct vdpa_mgmt_dev mgmt_dev; +static struct vduse_mgmt_dev *vduse_mgmt; static int vduse_dev_init_vdpa(struct vduse_dev *dev, const char *name) { @@ -1500,7 +1496,7 @@ static int vduse_dev_init_vdpa(struct vduse_dev *dev, const char *name) } set_dma_ops(&vdev->vdpa.dev, &vduse_dev_dma_ops); vdev->vdpa.dma_dev = &vdev->vdpa.dev; - vdev->vdpa.mdev = &mgmt_dev; + vdev->vdpa.mdev = &vduse_mgmt->mgmt_dev; return 0; } @@ -1545,34 +1541,52 @@ static struct virtio_device_id id_table[] = { { 0 }, }; -static struct vdpa_mgmt_dev mgmt_dev = { - .device = &vduse_mgmtdev, - .id_table = id_table, - .ops = &vdpa_dev_mgmtdev_ops, -}; +static void vduse_mgmtdev_release(struct device *dev) +{ + struct vduse_mgmt_dev *mgmt_dev; + + mgmt_dev = container_of(dev, struct vduse_mgmt_dev, dev); + kfree(mgmt_dev); +} static int vduse_mgmtdev_init(void) { int ret; - ret = device_register(&vduse_mgmtdev); - if (ret) + vduse_mgmt = kzalloc(sizeof(*vduse_mgmt), GFP_KERNEL); + if (!vduse_mgmt) + return -ENOMEM; + + ret = dev_set_name(&vduse_mgmt->dev, "vduse"); + if (ret) { + kfree(vduse_mgmt); return ret; + } - ret = vdpa_mgmtdev_register(&mgmt_dev); + vduse_mgmt->dev.release = vduse_mgmtdev_release; + + ret = device_register(&vduse_mgmt->dev); if (ret) - goto err; + goto dev_reg_err; - return 0; -err: - device_unregister(&vduse_mgmtdev); + vduse_mgmt->mgmt_dev.id_table = id_table; + vduse_mgmt->mgmt_dev.ops = &vdpa_dev_mgmtdev_ops; + vduse_mgmt->mgmt_dev.device = &vduse_mgmt->dev; + ret = vdpa_mgmtdev_register(&vduse_mgmt->mgmt_dev); + if (ret) + device_unregister(&vduse_mgmt->dev); + + return ret; + +dev_reg_err: + put_device(&vduse_mgmt->dev); return ret; } static void vduse_mgmtdev_exit(void) { - vdpa_mgmtdev_unregister(&mgmt_dev); - device_unregister(&vduse_mgmtdev); + vdpa_mgmtdev_unregister(&vduse_mgmt->mgmt_dev); + device_unregister(&vduse_mgmt->dev); } static int vduse_init(void) From 26d824d1316fa57b54711b1f1787aded425cfa52 Mon Sep 17 00:00:00 2001 From: Stephan Gerhold Date: Tue, 21 Jun 2022 13:06:20 +0200 Subject: [PATCH 0610/1056] virtio_mmio: Add missing PM calls to freeze/restore [ Upstream commit ed7ac37fde33ccd84e4bd2b9363c191f925364c7 ] Most virtio drivers provide freeze/restore callbacks to finish up device usage before suspend and to reinitialize the virtio device after resume. However, these callbacks are currently only called when using virtio_pci. virtio_mmio does not have any PM ops defined. This causes problems for example after suspend to disk (hibernation), since the virtio devices might lose their state after the VMM is restarted. Calling virtio_device_freeze()/restore() ensures that the virtio devices are re-initialized correctly. Fix this by implementing the dev_pm_ops for virtio_mmio, similar to virtio_pci_common. Signed-off-by: Stephan Gerhold Message-Id: <20220621110621.3638025-2-stephan.gerhold@kernkonzept.com> Signed-off-by: Michael S. Tsirkin Signed-off-by: Sasha Levin --- drivers/virtio/virtio_mmio.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/drivers/virtio/virtio_mmio.c b/drivers/virtio/virtio_mmio.c index 1dd396d4bebb26..7522832529dd60 100644 --- a/drivers/virtio/virtio_mmio.c +++ b/drivers/virtio/virtio_mmio.c @@ -62,6 +62,7 @@ #include #include #include +#include #include #include #include @@ -543,6 +544,25 @@ static const struct virtio_config_ops virtio_mmio_config_ops = { .get_shm_region = vm_get_shm_region, }; +#ifdef CONFIG_PM_SLEEP +static int virtio_mmio_freeze(struct device *dev) +{ + struct virtio_mmio_device *vm_dev = dev_get_drvdata(dev); + + return virtio_device_freeze(&vm_dev->vdev); +} + +static int virtio_mmio_restore(struct device *dev) +{ + struct virtio_mmio_device *vm_dev = dev_get_drvdata(dev); + + return virtio_device_restore(&vm_dev->vdev); +} + +static const struct dev_pm_ops virtio_mmio_pm_ops = { + SET_SYSTEM_SLEEP_PM_OPS(virtio_mmio_freeze, virtio_mmio_restore) +}; +#endif static void virtio_mmio_release_dev(struct device *_d) { @@ -786,6 +806,9 @@ static struct platform_driver virtio_mmio_driver = { .name = "virtio-mmio", .of_match_table = virtio_mmio_match, .acpi_match_table = ACPI_PTR(virtio_mmio_acpi_match), +#ifdef CONFIG_PM_SLEEP + .pm = &virtio_mmio_pm_ops, +#endif }, }; From bf53079245634c3eccdab47befb8ffff77c6141a Mon Sep 17 00:00:00 2001 From: Stephan Gerhold Date: Tue, 21 Jun 2022 13:06:21 +0200 Subject: [PATCH 0611/1056] virtio_mmio: Restore guest page size on resume [ Upstream commit e0c2ce8217955537dd5434baeba061f209797119 ] Virtio devices might lose their state when the VMM is restarted after a suspend to disk (hibernation) cycle. This means that the guest page size register must be restored for the virtio_mmio legacy interface, since otherwise the virtio queues are not functional. This is particularly problematic for QEMU that currently still defaults to using the legacy interface for virtio_mmio. Write the guest page size register again in virtio_mmio_restore() to make legacy virtio_mmio devices work correctly after hibernation. Signed-off-by: Stephan Gerhold Message-Id: <20220621110621.3638025-3-stephan.gerhold@kernkonzept.com> Signed-off-by: Michael S. Tsirkin Signed-off-by: Sasha Levin --- drivers/virtio/virtio_mmio.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/virtio/virtio_mmio.c b/drivers/virtio/virtio_mmio.c index 7522832529dd60..fe696aafaed86e 100644 --- a/drivers/virtio/virtio_mmio.c +++ b/drivers/virtio/virtio_mmio.c @@ -556,6 +556,9 @@ static int virtio_mmio_restore(struct device *dev) { struct virtio_mmio_device *vm_dev = dev_get_drvdata(dev); + if (vm_dev->version == 1) + writel(PAGE_SIZE, vm_dev->base + VIRTIO_MMIO_GUEST_PAGE_SIZE); + return virtio_device_restore(&vm_dev->vdev); } From 03a400a64050744398cc3645ca51a7cb56d13010 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 21 Jun 2022 18:26:03 +0200 Subject: [PATCH 0612/1056] netfilter: br_netfilter: do not skip all hooks with 0 priority [ Upstream commit c2577862eeb0be94f151f2f1fff662b028061b00 ] When br_netfilter module is loaded, skbs may be diverted to the ipv4/ipv6 hooks, just like as if we were routing. Unfortunately, bridge filter hooks with priority 0 may be skipped in this case. Example: 1. an nftables bridge ruleset is loaded, with a prerouting hook that has priority 0. 2. interface is added to the bridge. 3. no tcp packet is ever seen by the bridge prerouting hook. 4. flush the ruleset 5. load the bridge ruleset again. 6. tcp packets are processed as expected. After 1) the only registered hook is the bridge prerouting hook, but its not called yet because the bridge hasn't been brought up yet. After 2), hook order is: 0 br_nf_pre_routing // br_netfilter internal hook 0 chain bridge f prerouting // nftables bridge ruleset The packet is diverted to br_nf_pre_routing. If call-iptables is off, the nftables bridge ruleset is called as expected. But if its enabled, br_nf_hook_thresh() will skip it because it assumes that all 0-priority hooks had been called previously in bridge context. To avoid this, check for the br_nf_pre_routing hook itself, we need to resume directly after it, even if this hook has a priority of 0. Unfortunately, this still results in different packet flow. With this fix, the eval order after in 3) is: 1. br_nf_pre_routing 2. ip(6)tables (if enabled) 3. nftables bridge but after 5 its the much saner: 1. nftables bridge 2. br_nf_pre_routing 3. ip(6)tables (if enabled) Unfortunately I don't see a solution here: It would be possible to move br_nf_pre_routing to a higher priority so that it will be called later in the pipeline, but this also impacts ebtables evaluation order, and would still result in this very ordering problem for all nftables-bridge hooks with the same priority as the br_nf_pre_routing one. Searching back through the git history I don't think this has ever behaved in any other way, hence, no fixes-tag. Reported-by: Radim Hrazdil Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/bridge/br_netfilter_hooks.c | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c index 68c0d0f928908e..10a2c7bca7199d 100644 --- a/net/bridge/br_netfilter_hooks.c +++ b/net/bridge/br_netfilter_hooks.c @@ -1012,9 +1012,24 @@ int br_nf_hook_thresh(unsigned int hook, struct net *net, return okfn(net, sk, skb); ops = nf_hook_entries_get_hook_ops(e); - for (i = 0; i < e->num_hook_entries && - ops[i]->priority <= NF_BR_PRI_BRNF; i++) - ; + for (i = 0; i < e->num_hook_entries; i++) { + /* These hooks have already been called */ + if (ops[i]->priority < NF_BR_PRI_BRNF) + continue; + + /* These hooks have not been called yet, run them. */ + if (ops[i]->priority > NF_BR_PRI_BRNF) + break; + + /* take a closer look at NF_BR_PRI_BRNF. */ + if (ops[i]->hook == br_nf_pre_routing) { + /* This hook diverted the skb to this function, + * hooks after this have not been run yet. + */ + i++; + break; + } + } nf_hook_state_init(&state, hook, NFPROTO_BRIDGE, indev, outdev, sk, net, okfn); From 06f818de1621ea4da689fd45f1eb54ff7617342b Mon Sep 17 00:00:00 2001 From: John Garry Date: Thu, 23 Jun 2022 20:41:59 +0800 Subject: [PATCH 0613/1056] scsi: hisi_sas: Limit max hw sectors for v3 HW [ Upstream commit fce54ed027577517df1e74b7d54dc2b1bd536887 ] If the controller is behind an IOMMU then the IOMMU IOVA caching range can affect performance, as discussed in [0]. Limit the max HW sectors to not exceed this limit. We need to hardcode the value until a proper DMA mapping API is available. [0] https://lore.kernel.org/linux-iommu/20210129092120.1482-1-thunder.leizhen@huawei.com/ Link: https://lore.kernel.org/r/1655988119-223714-1-git-send-email-john.garry@huawei.com Signed-off-by: John Garry Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/hisi_sas/hisi_sas_v3_hw.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c index 1f5e0688c0c84d..15c7451fb30f97 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c +++ b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c @@ -2758,6 +2758,7 @@ static int slave_configure_v3_hw(struct scsi_device *sdev) struct hisi_hba *hisi_hba = shost_priv(shost); struct device *dev = hisi_hba->dev; int ret = sas_slave_configure(sdev); + unsigned int max_sectors; if (ret) return ret; @@ -2775,6 +2776,12 @@ static int slave_configure_v3_hw(struct scsi_device *sdev) } } + /* Set according to IOMMU IOVA caching limit */ + max_sectors = min_t(size_t, queue_max_hw_sectors(sdev->request_queue), + (PAGE_SIZE * 32) >> SECTOR_SHIFT); + + blk_queue_max_hw_sectors(sdev->request_queue, max_sectors); + return 0; } From 57289b6601fe78c09921599b042a0b430fb420ec Mon Sep 17 00:00:00 2001 From: Liang He Date: Sat, 18 Jun 2022 10:25:45 +0800 Subject: [PATCH 0614/1056] cpufreq: pmac32-cpufreq: Fix refcount leak bug [ Upstream commit ccd7567d4b6cf187fdfa55f003a9e461ee629e36 ] In pmac_cpufreq_init_MacRISC3(), we need to add corresponding of_node_put() for the three node pointers whose refcount have been incremented by of_find_node_by_name(). Signed-off-by: Liang He Signed-off-by: Viresh Kumar Signed-off-by: Sasha Levin --- drivers/cpufreq/pmac32-cpufreq.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/cpufreq/pmac32-cpufreq.c b/drivers/cpufreq/pmac32-cpufreq.c index 4f20c6a9108df5..8e41fe9ee870df 100644 --- a/drivers/cpufreq/pmac32-cpufreq.c +++ b/drivers/cpufreq/pmac32-cpufreq.c @@ -470,6 +470,10 @@ static int pmac_cpufreq_init_MacRISC3(struct device_node *cpunode) if (slew_done_gpio_np) slew_done_gpio = read_gpio(slew_done_gpio_np); + of_node_put(volt_gpio_np); + of_node_put(freq_gpio_np); + of_node_put(slew_done_gpio_np); + /* If we use the frequency GPIOs, calculate the min/max speeds based * on the bus frequencies */ From f57c76104c74fb16699417dc8bd042fc122c04fa Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Tue, 28 Jun 2022 20:37:26 +0800 Subject: [PATCH 0615/1056] platform/x86: hp-wmi: Ignore Sanitization Mode event [ Upstream commit 9ab762a84b8094540c18a170e5ddd6488632c456 ] After system resume the hp-wmi driver may complain: [ 702.620180] hp_wmi: Unknown event_id - 23 - 0x0 According to HP it means 'Sanitization Mode' and it's harmless to just ignore the event. Cc: Jorge Lopez Signed-off-by: Kai-Heng Feng Link: https://lore.kernel.org/r/20220628123726.250062-1-kai.heng.feng@canonical.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede Signed-off-by: Sasha Levin --- drivers/platform/x86/hp-wmi.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/platform/x86/hp-wmi.c b/drivers/platform/x86/hp-wmi.c index 027a1467d009fd..1cd168e3281096 100644 --- a/drivers/platform/x86/hp-wmi.c +++ b/drivers/platform/x86/hp-wmi.c @@ -63,6 +63,7 @@ enum hp_wmi_event_ids { HPWMI_BACKLIT_KB_BRIGHTNESS = 0x0D, HPWMI_PEAKSHIFT_PERIOD = 0x0F, HPWMI_BATTERY_CHARGE_PERIOD = 0x10, + HPWMI_SANITIZATION_MODE = 0x17, }; struct bios_args { @@ -638,6 +639,8 @@ static void hp_wmi_notify(u32 value, void *context) break; case HPWMI_BATTERY_CHARGE_PERIOD: break; + case HPWMI_SANITIZATION_MODE: + break; default: pr_info("Unknown event_id - %d - 0x%x\n", event_id, event_data); break; From 2ef4c6d66f7dc8f18047707def98b38e16ed628e Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Tue, 7 Jun 2022 20:23:34 +0200 Subject: [PATCH 0616/1056] firmware: sysfb: Make sysfb_create_simplefb() return a pdev pointer [ Upstream commit 9e121040e54abef9ed5542e5fdfa87911cd96204 ] This function just returned 0 on success or an errno code on error, but it could be useful for sysfb_init() callers to have a pointer to the device. Signed-off-by: Javier Martinez Canillas Reviewed-by: Daniel Vetter Reviewed-by: Thomas Zimmermann Link: https://patchwork.freedesktop.org/patch/msgid/20220607182338.344270-2-javierm@redhat.com Signed-off-by: Sasha Levin --- drivers/firmware/sysfb.c | 4 ++-- drivers/firmware/sysfb_simplefb.c | 16 ++++++++-------- include/linux/sysfb.h | 10 +++++----- 3 files changed, 15 insertions(+), 15 deletions(-) diff --git a/drivers/firmware/sysfb.c b/drivers/firmware/sysfb.c index 2bfbb05f7d8969..b032f40a92de02 100644 --- a/drivers/firmware/sysfb.c +++ b/drivers/firmware/sysfb.c @@ -46,8 +46,8 @@ static __init int sysfb_init(void) /* try to create a simple-framebuffer device */ compatible = sysfb_parse_mode(si, &mode); if (compatible) { - ret = sysfb_create_simplefb(si, &mode); - if (!ret) + pd = sysfb_create_simplefb(si, &mode); + if (!IS_ERR(pd)) return 0; } diff --git a/drivers/firmware/sysfb_simplefb.c b/drivers/firmware/sysfb_simplefb.c index 757cc8b9f3de91..eac51c2a27bade 100644 --- a/drivers/firmware/sysfb_simplefb.c +++ b/drivers/firmware/sysfb_simplefb.c @@ -57,8 +57,8 @@ __init bool sysfb_parse_mode(const struct screen_info *si, return false; } -__init int sysfb_create_simplefb(const struct screen_info *si, - const struct simplefb_platform_data *mode) +__init struct platform_device *sysfb_create_simplefb(const struct screen_info *si, + const struct simplefb_platform_data *mode) { struct platform_device *pd; struct resource res; @@ -76,7 +76,7 @@ __init int sysfb_create_simplefb(const struct screen_info *si, base |= (u64)si->ext_lfb_base << 32; if (!base || (u64)(resource_size_t)base != base) { printk(KERN_DEBUG "sysfb: inaccessible VRAM base\n"); - return -EINVAL; + return ERR_PTR(-EINVAL); } /* @@ -93,7 +93,7 @@ __init int sysfb_create_simplefb(const struct screen_info *si, length = mode->height * mode->stride; if (length > size) { printk(KERN_WARNING "sysfb: VRAM smaller than advertised\n"); - return -EINVAL; + return ERR_PTR(-EINVAL); } length = PAGE_ALIGN(length); @@ -104,11 +104,11 @@ __init int sysfb_create_simplefb(const struct screen_info *si, res.start = base; res.end = res.start + length - 1; if (res.end <= res.start) - return -EINVAL; + return ERR_PTR(-EINVAL); pd = platform_device_alloc("simple-framebuffer", 0); if (!pd) - return -ENOMEM; + return ERR_PTR(-ENOMEM); sysfb_apply_efi_quirks(pd); @@ -124,10 +124,10 @@ __init int sysfb_create_simplefb(const struct screen_info *si, if (ret) goto err_put_device; - return 0; + return pd; err_put_device: platform_device_put(pd); - return ret; + return ERR_PTR(ret); } diff --git a/include/linux/sysfb.h b/include/linux/sysfb.h index b0dcfa26d07bd0..708152e9037bf4 100644 --- a/include/linux/sysfb.h +++ b/include/linux/sysfb.h @@ -72,8 +72,8 @@ static inline void sysfb_apply_efi_quirks(struct platform_device *pd) bool sysfb_parse_mode(const struct screen_info *si, struct simplefb_platform_data *mode); -int sysfb_create_simplefb(const struct screen_info *si, - const struct simplefb_platform_data *mode); +struct platform_device *sysfb_create_simplefb(const struct screen_info *si, + const struct simplefb_platform_data *mode); #else /* CONFIG_SYSFB_SIMPLE */ @@ -83,10 +83,10 @@ static inline bool sysfb_parse_mode(const struct screen_info *si, return false; } -static inline int sysfb_create_simplefb(const struct screen_info *si, - const struct simplefb_platform_data *mode) +static inline struct platform_device *sysfb_create_simplefb(const struct screen_info *si, + const struct simplefb_platform_data *mode) { - return -EINVAL; + return ERR_PTR(-EINVAL); } #endif /* CONFIG_SYSFB_SIMPLE */ From b952569e03168e23258fabf3faa4d6d2b8e08835 Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Tue, 7 Jun 2022 20:23:35 +0200 Subject: [PATCH 0617/1056] firmware: sysfb: Add sysfb_disable() helper function [ Upstream commit bde376e9de3c0bc55eedc8956b0f114c05531595 ] This can be used by subsystems to unregister a platform device registered by sysfb and also to disable future platform device registration in sysfb. Suggested-by: Daniel Vetter Signed-off-by: Javier Martinez Canillas Reviewed-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20220607182338.344270-3-javierm@redhat.com Signed-off-by: Sasha Levin --- .../driver-api/firmware/other_interfaces.rst | 6 +++ drivers/firmware/sysfb.c | 54 ++++++++++++++++--- include/linux/sysfb.h | 12 +++++ 3 files changed, 66 insertions(+), 6 deletions(-) diff --git a/Documentation/driver-api/firmware/other_interfaces.rst b/Documentation/driver-api/firmware/other_interfaces.rst index b81794e0cfbb93..06ac89adaafba2 100644 --- a/Documentation/driver-api/firmware/other_interfaces.rst +++ b/Documentation/driver-api/firmware/other_interfaces.rst @@ -13,6 +13,12 @@ EDD Interfaces .. kernel-doc:: drivers/firmware/edd.c :internal: +Generic System Framebuffers Interface +------------------------------------- + +.. kernel-doc:: drivers/firmware/sysfb.c + :export: + Intel Stratix10 SoC Service Layer --------------------------------- Some features of the Intel Stratix10 SoC require a level of privilege diff --git a/drivers/firmware/sysfb.c b/drivers/firmware/sysfb.c index b032f40a92de02..1f276f108cc936 100644 --- a/drivers/firmware/sysfb.c +++ b/drivers/firmware/sysfb.c @@ -34,21 +34,59 @@ #include #include +static struct platform_device *pd; +static DEFINE_MUTEX(disable_lock); +static bool disabled; + +static bool sysfb_unregister(void) +{ + if (IS_ERR_OR_NULL(pd)) + return false; + + platform_device_unregister(pd); + pd = NULL; + + return true; +} + +/** + * sysfb_disable() - disable the Generic System Framebuffers support + * + * This disables the registration of system framebuffer devices that match the + * generic drivers that make use of the system framebuffer set up by firmware. + * + * It also unregisters a device if this was already registered by sysfb_init(). + * + * Context: The function can sleep. A @disable_lock mutex is acquired to serialize + * against sysfb_init(), that registers a system framebuffer device. + */ +void sysfb_disable(void) +{ + mutex_lock(&disable_lock); + sysfb_unregister(); + disabled = true; + mutex_unlock(&disable_lock); +} +EXPORT_SYMBOL_GPL(sysfb_disable); + static __init int sysfb_init(void) { struct screen_info *si = &screen_info; struct simplefb_platform_data mode; - struct platform_device *pd; const char *name; bool compatible; - int ret; + int ret = 0; + + mutex_lock(&disable_lock); + if (disabled) + goto unlock_mutex; /* try to create a simple-framebuffer device */ compatible = sysfb_parse_mode(si, &mode); if (compatible) { pd = sysfb_create_simplefb(si, &mode); if (!IS_ERR(pd)) - return 0; + goto unlock_mutex; } /* if the FB is incompatible, create a legacy framebuffer device */ @@ -60,8 +98,10 @@ static __init int sysfb_init(void) name = "platform-framebuffer"; pd = platform_device_alloc(name, 0); - if (!pd) - return -ENOMEM; + if (!pd) { + ret = -ENOMEM; + goto unlock_mutex; + } sysfb_apply_efi_quirks(pd); @@ -73,9 +113,11 @@ static __init int sysfb_init(void) if (ret) goto err; - return 0; + goto unlock_mutex; err: platform_device_put(pd); +unlock_mutex: + mutex_unlock(&disable_lock); return ret; } diff --git a/include/linux/sysfb.h b/include/linux/sysfb.h index 708152e9037bf4..8ba8b5be556751 100644 --- a/include/linux/sysfb.h +++ b/include/linux/sysfb.h @@ -55,6 +55,18 @@ struct efifb_dmi_info { int flags; }; +#ifdef CONFIG_SYSFB + +void sysfb_disable(void); + +#else /* CONFIG_SYSFB */ + +static inline void sysfb_disable(void) +{ +} + +#endif /* CONFIG_SYSFB */ + #ifdef CONFIG_EFI extern struct efifb_dmi_info efifb_dmi_list[]; From 13d28e0c79cbf69fc6f145767af66905586c1249 Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Tue, 7 Jun 2022 20:23:36 +0200 Subject: [PATCH 0618/1056] fbdev: Disable sysfb device registration when removing conflicting FBs [ Upstream commit ee7a69aa38d87a3bbced7b8245c732c05ed0c6ec ] The platform devices registered by sysfb match with firmware-based DRM or fbdev drivers, that are used to have early graphics using a framebuffer provided by the system firmware. DRM or fbdev drivers later are probed and remove conflicting framebuffers, leading to these platform devices for generic drivers to be unregistered. But the current solution has a race, since the sysfb_init() function could be called after a DRM or fbdev driver is probed and request to unregister the devices for drivers with conflicting framebuffes. To prevent this, disable any future sysfb platform device registration by calling sysfb_disable(), if a driver requests to remove the conflicting framebuffers. Suggested-by: Daniel Vetter Signed-off-by: Javier Martinez Canillas Reviewed-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20220607182338.344270-4-javierm@redhat.com Signed-off-by: Sasha Levin --- drivers/video/fbdev/core/fbmem.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/video/fbdev/core/fbmem.c b/drivers/video/fbdev/core/fbmem.c index 6d7868cc1fca05..528c87ff14d802 100644 --- a/drivers/video/fbdev/core/fbmem.c +++ b/drivers/video/fbdev/core/fbmem.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -1786,6 +1787,17 @@ int remove_conflicting_framebuffers(struct apertures_struct *a, do_free = true; } + /* + * If a driver asked to unregister a platform device registered by + * sysfb, then can be assumed that this is a driver for a display + * that is set up by the system firmware and has a generic driver. + * + * Drivers for devices that don't have a generic driver will never + * ask for this, so let's assume that a real driver for the display + * was already probed and prevent sysfb to register devices later. + */ + sysfb_disable(); + mutex_lock(®istration_lock); do_remove_conflicting_framebuffers(a, name, primary); mutex_unlock(®istration_lock); From 833ecd0eae76eadf81d6d747bb5bc992d1151867 Mon Sep 17 00:00:00 2001 From: Hangyu Hua Date: Wed, 29 Jun 2022 14:34:18 +0800 Subject: [PATCH 0619/1056] net: tipc: fix possible refcount leak in tipc_sk_create() [ Upstream commit 00aff3590fc0a73bddd3b743863c14e76fd35c0c ] Free sk in case tipc_sk_insert() fails. Signed-off-by: Hangyu Hua Reviewed-by: Tung Nguyen Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/tipc/socket.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 17f8c523e33b04..43509c7e90fc28 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -502,6 +502,7 @@ static int tipc_sk_create(struct net *net, struct socket *sock, sock_init_data(sock, sk); tipc_set_sk_state(sk, TIPC_OPEN); if (tipc_sk_insert(tsk)) { + sk_free(sk); pr_warn("Socket create failed; port number exhausted\n"); return -EINVAL; } From 8ab067462e09e0152c4ad2467c0c719d78f53583 Mon Sep 17 00:00:00 2001 From: Michael Walle Date: Mon, 27 Jun 2022 19:06:43 +0200 Subject: [PATCH 0620/1056] NFC: nxp-nci: don't print header length mismatch on i2c error [ Upstream commit 9577fc5fdc8b07b891709af6453545db405e24ad ] Don't print a misleading header length mismatch error if the i2c call returns an error. Instead just return the error code without any error message. Signed-off-by: Michael Walle Reviewed-by: Krzysztof Kozlowski Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/nfc/nxp-nci/i2c.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/nfc/nxp-nci/i2c.c b/drivers/nfc/nxp-nci/i2c.c index e8f3b35afbee4e..ae2ba08d8ac3f3 100644 --- a/drivers/nfc/nxp-nci/i2c.c +++ b/drivers/nfc/nxp-nci/i2c.c @@ -122,7 +122,9 @@ static int nxp_nci_i2c_fw_read(struct nxp_nci_i2c_phy *phy, skb_put_data(*skb, &header, NXP_NCI_FW_HDR_LEN); r = i2c_master_recv(client, skb_put(*skb, frame_len), frame_len); - if (r != frame_len) { + if (r < 0) { + goto fw_read_exit_free_skb; + } else if (r != frame_len) { nfc_err(&client->dev, "Invalid frame length: %u (expected %zu)\n", r, frame_len); @@ -166,7 +168,9 @@ static int nxp_nci_i2c_nci_read(struct nxp_nci_i2c_phy *phy, return 0; r = i2c_master_recv(client, skb_put(*skb, header.plen), header.plen); - if (r != header.plen) { + if (r < 0) { + goto nci_read_exit_free_skb; + } else if (r != header.plen) { nfc_err(&client->dev, "Invalid frame payload length: %u (expected %u)\n", r, header.plen); From 1e4427aa2fc840a9549a9c751c436ef9ec7713b6 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Sun, 26 Jun 2022 12:24:51 +0300 Subject: [PATCH 0621/1056] nvme-tcp: always fail a request when sending it failed [ Upstream commit 41d07df7de841bfbc32725ce21d933ad358f2844 ] queue stoppage and inflight requests cancellation is fully fenced from io_work and thus failing a request from this context. Hence we don't need to try to guess from the socket retcode if this failure is because the queue is about to be torn down or not. We are perfectly safe to just fail it, the request will not be cancelled later on. This solves possible very long shutdown delays when the users issues a 'nvme disconnect-all' Reported-by: Daniel Wagner Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig Signed-off-by: Sasha Levin --- drivers/nvme/host/tcp.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 10882d3d554c2b..1821d38e620ecf 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -1162,8 +1162,7 @@ static int nvme_tcp_try_send(struct nvme_tcp_queue *queue) } else if (ret < 0) { dev_err(queue->ctrl->ctrl.device, "failed to send request %d\n", ret); - if (ret != -EPIPE && ret != -ECONNRESET) - nvme_tcp_fail_request(queue->request); + nvme_tcp_fail_request(queue->request); nvme_tcp_done_send_req(queue); } return ret; From 7a2294c5f2e5636772afe6bd6c5b28218e0ea154 Mon Sep 17 00:00:00 2001 From: Ruozhu Li Date: Thu, 23 Jun 2022 14:45:39 +0800 Subject: [PATCH 0622/1056] nvme: fix regression when disconnect a recovering ctrl [ Upstream commit f7f70f4aa09dc43d7455c060143e86a017c30548 ] We encountered a problem that the disconnect command hangs. After analyzing the log and stack, we found that the triggering process is as follows: CPU0 CPU1 nvme_rdma_error_recovery_work nvme_rdma_teardown_io_queues nvme_do_delete_ctrl nvme_stop_queues nvme_remove_namespaces --clear ctrl->namespaces nvme_start_queues --no ns in ctrl->namespaces nvme_ns_remove return(because ctrl is deleting) blk_freeze_queue blk_mq_freeze_queue_wait --wait for ns to unquiesce to clean infligt IO, hang forever This problem was not found in older kernels because we will flush err work in nvme_stop_ctrl before nvme_remove_namespaces.It does not seem to be modified for functional reasons, the patch can be revert to solve the problem. Revert commit 794a4cb3d2f7 ("nvme: remove the .stop_ctrl callout") Signed-off-by: Ruozhu Li Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig Signed-off-by: Sasha Levin --- drivers/nvme/host/core.c | 2 ++ drivers/nvme/host/nvme.h | 1 + drivers/nvme/host/rdma.c | 12 +++++++++--- drivers/nvme/host/tcp.c | 10 +++++++--- 4 files changed, 19 insertions(+), 6 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 19054b791c6727..29b56ea01132f4 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -4385,6 +4385,8 @@ void nvme_stop_ctrl(struct nvme_ctrl *ctrl) nvme_stop_failfast_work(ctrl); flush_work(&ctrl->async_event_work); cancel_work_sync(&ctrl->fw_act_work); + if (ctrl->ops->stop_ctrl) + ctrl->ops->stop_ctrl(ctrl); } EXPORT_SYMBOL_GPL(nvme_stop_ctrl); diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 72bcd7e5716e53..75a7e7baa1fc63 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -495,6 +495,7 @@ struct nvme_ctrl_ops { void (*free_ctrl)(struct nvme_ctrl *ctrl); void (*submit_async_event)(struct nvme_ctrl *ctrl); void (*delete_ctrl)(struct nvme_ctrl *ctrl); + void (*stop_ctrl)(struct nvme_ctrl *ctrl); int (*get_address)(struct nvme_ctrl *ctrl, char *buf, int size); }; diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index d51f52e296f50e..2db9c166a1b76b 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -1049,6 +1049,14 @@ static void nvme_rdma_teardown_io_queues(struct nvme_rdma_ctrl *ctrl, } } +static void nvme_rdma_stop_ctrl(struct nvme_ctrl *nctrl) +{ + struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl); + + cancel_work_sync(&ctrl->err_work); + cancel_delayed_work_sync(&ctrl->reconnect_work); +} + static void nvme_rdma_free_ctrl(struct nvme_ctrl *nctrl) { struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl); @@ -2230,9 +2238,6 @@ static const struct blk_mq_ops nvme_rdma_admin_mq_ops = { static void nvme_rdma_shutdown_ctrl(struct nvme_rdma_ctrl *ctrl, bool shutdown) { - cancel_work_sync(&ctrl->err_work); - cancel_delayed_work_sync(&ctrl->reconnect_work); - nvme_rdma_teardown_io_queues(ctrl, shutdown); blk_mq_quiesce_queue(ctrl->ctrl.admin_q); if (shutdown) @@ -2282,6 +2287,7 @@ static const struct nvme_ctrl_ops nvme_rdma_ctrl_ops = { .submit_async_event = nvme_rdma_submit_async_event, .delete_ctrl = nvme_rdma_delete_ctrl, .get_address = nvmf_get_address, + .stop_ctrl = nvme_rdma_stop_ctrl, }; /* diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 1821d38e620ecf..20138e132558c9 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -2163,9 +2163,6 @@ static void nvme_tcp_error_recovery_work(struct work_struct *work) static void nvme_tcp_teardown_ctrl(struct nvme_ctrl *ctrl, bool shutdown) { - cancel_work_sync(&to_tcp_ctrl(ctrl)->err_work); - cancel_delayed_work_sync(&to_tcp_ctrl(ctrl)->connect_work); - nvme_tcp_teardown_io_queues(ctrl, shutdown); blk_mq_quiesce_queue(ctrl->admin_q); if (shutdown) @@ -2205,6 +2202,12 @@ static void nvme_reset_ctrl_work(struct work_struct *work) nvme_tcp_reconnect_or_remove(ctrl); } +static void nvme_tcp_stop_ctrl(struct nvme_ctrl *ctrl) +{ + cancel_work_sync(&to_tcp_ctrl(ctrl)->err_work); + cancel_delayed_work_sync(&to_tcp_ctrl(ctrl)->connect_work); +} + static void nvme_tcp_free_ctrl(struct nvme_ctrl *nctrl) { struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl); @@ -2528,6 +2531,7 @@ static const struct nvme_ctrl_ops nvme_tcp_ctrl_ops = { .submit_async_event = nvme_tcp_submit_async_event, .delete_ctrl = nvme_tcp_delete_ctrl, .get_address = nvmf_get_address, + .stop_ctrl = nvme_tcp_stop_ctrl, }; static bool From 204543581a2f26bb3b997a304c0bd06926ba7f15 Mon Sep 17 00:00:00 2001 From: Jianglei Nie Date: Wed, 29 Jun 2022 15:55:50 +0800 Subject: [PATCH 0623/1056] net: sfp: fix memory leak in sfp_probe() [ Upstream commit 0a18d802d65cf662644fd1d369c86d84a5630652 ] sfp_probe() allocates a memory chunk from sfp with sfp_alloc(). When devm_add_action() fails, sfp is not freed, which leads to a memory leak. We should use devm_add_action_or_reset() instead of devm_add_action(). Signed-off-by: Jianglei Nie Reviewed-by: Russell King (Oracle) Link: https://lore.kernel.org/r/20220629075550.2152003-1-niejianglei2021@163.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/phy/sfp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/phy/sfp.c b/drivers/net/phy/sfp.c index 90dfefc1f5f8da..028a5df5c53851 100644 --- a/drivers/net/phy/sfp.c +++ b/drivers/net/phy/sfp.c @@ -2504,7 +2504,7 @@ static int sfp_probe(struct platform_device *pdev) platform_set_drvdata(pdev, sfp); - err = devm_add_action(sfp->dev, sfp_cleanup, sfp); + err = devm_add_action_or_reset(sfp->dev, sfp_cleanup, sfp); if (err < 0) return err; From 0c6c7d57ed001fc6390f5ffb85477de7964c9e49 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Sat, 4 Jun 2022 11:52:46 +0100 Subject: [PATCH 0624/1056] ASoC: ops: Fix off by one in range control validation [ Upstream commit 5871321fb4558c55bf9567052b618ff0be6b975e ] We currently report that range controls accept a range of 0..(max-min) but accept writes in the range 0..(max-min+1). Remove that extra +1. Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20220604105246.4055214-1-broonie@kernel.org Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/soc-ops.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/soc-ops.c b/sound/soc/soc-ops.c index f32ba64c5ddabe..e73360e9de8f90 100644 --- a/sound/soc/soc-ops.c +++ b/sound/soc/soc-ops.c @@ -526,7 +526,7 @@ int snd_soc_put_volsw_range(struct snd_kcontrol *kcontrol, return -EINVAL; if (mc->platform_max && tmp > mc->platform_max) return -EINVAL; - if (tmp > mc->max - mc->min + 1) + if (tmp > mc->max - mc->min) return -EINVAL; if (invert) @@ -547,7 +547,7 @@ int snd_soc_put_volsw_range(struct snd_kcontrol *kcontrol, return -EINVAL; if (mc->platform_max && tmp > mc->platform_max) return -EINVAL; - if (tmp > mc->max - mc->min + 1) + if (tmp > mc->max - mc->min) return -EINVAL; if (invert) From 3cb392b64304a05bf647e2e44efacd9a1f3c3c6a Mon Sep 17 00:00:00 2001 From: Haowen Bai Date: Thu, 21 Apr 2022 10:26:59 +0800 Subject: [PATCH 0625/1056] pinctrl: aspeed: Fix potential NULL dereference in aspeed_pinmux_set_mux() [ Upstream commit 84a85d3fef2e75b1fe9fc2af6f5267122555a1ed ] pdesc could be null but still dereference pdesc->name and it will lead to a null pointer access. So we move a null check before dereference. Signed-off-by: Haowen Bai Link: https://lore.kernel.org/r/1650508019-22554-1-git-send-email-baihaowen@meizu.com Signed-off-by: Linus Walleij Signed-off-by: Sasha Levin --- drivers/pinctrl/aspeed/pinctrl-aspeed.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/pinctrl/aspeed/pinctrl-aspeed.c b/drivers/pinctrl/aspeed/pinctrl-aspeed.c index c94e24aadf922d..83d47ff1cea8f2 100644 --- a/drivers/pinctrl/aspeed/pinctrl-aspeed.c +++ b/drivers/pinctrl/aspeed/pinctrl-aspeed.c @@ -236,11 +236,11 @@ int aspeed_pinmux_set_mux(struct pinctrl_dev *pctldev, unsigned int function, const struct aspeed_sig_expr **funcs; const struct aspeed_sig_expr ***prios; - pr_debug("Muxing pin %s for %s\n", pdesc->name, pfunc->name); - if (!pdesc) return -EINVAL; + pr_debug("Muxing pin %s for %s\n", pdesc->name, pfunc->name); + prios = pdesc->prios; if (!prios) From 4e81b38d942666558fb5bdbfca3856abc60a152d Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Mon, 6 Jun 2022 15:37:46 -0500 Subject: [PATCH 0626/1056] ASoC: Realtek/Maxim SoundWire codecs: disable pm_runtime on remove [ Upstream commit ac63716da3070f8cb6baaba3a058a0c7f22aeb5b ] When binding/unbinding codec drivers, the following warnings are thrown: [ 107.266879] rt715-sdca sdw:3:025d:0714:01: Unbalanced pm_runtime_enable! [ 306.879700] rt711-sdca sdw:0:025d:0711:01: Unbalanced pm_runtime_enable! Add a remove callback for all Realtek/Maxim SoundWire codecs and remove this warning. Signed-off-by: Pierre-Louis Bossart Reviewed-by: Rander Wang Reviewed-by: Bard Liao Link: https://lore.kernel.org/r/20220606203752.144159-2-pierre-louis.bossart@linux.intel.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/codecs/max98373-sdw.c | 12 +++++++++++- sound/soc/codecs/rt1308-sdw.c | 11 +++++++++++ sound/soc/codecs/rt1316-sdw.c | 11 +++++++++++ sound/soc/codecs/rt5682-sdw.c | 5 ++++- sound/soc/codecs/rt700-sdw.c | 6 +++++- sound/soc/codecs/rt711-sdca-sdw.c | 6 +++++- sound/soc/codecs/rt711-sdw.c | 6 +++++- sound/soc/codecs/rt715-sdca-sdw.c | 12 ++++++++++++ sound/soc/codecs/rt715-sdw.c | 12 ++++++++++++ 9 files changed, 76 insertions(+), 5 deletions(-) diff --git a/sound/soc/codecs/max98373-sdw.c b/sound/soc/codecs/max98373-sdw.c index dc520effc61cb8..12323d4b5bfaa9 100644 --- a/sound/soc/codecs/max98373-sdw.c +++ b/sound/soc/codecs/max98373-sdw.c @@ -862,6 +862,16 @@ static int max98373_sdw_probe(struct sdw_slave *slave, return max98373_init(slave, regmap); } +static int max98373_sdw_remove(struct sdw_slave *slave) +{ + struct max98373_priv *max98373 = dev_get_drvdata(&slave->dev); + + if (max98373->first_hw_init) + pm_runtime_disable(&slave->dev); + + return 0; +} + #if defined(CONFIG_OF) static const struct of_device_id max98373_of_match[] = { { .compatible = "maxim,max98373", }, @@ -893,7 +903,7 @@ static struct sdw_driver max98373_sdw_driver = { .pm = &max98373_pm, }, .probe = max98373_sdw_probe, - .remove = NULL, + .remove = max98373_sdw_remove, .ops = &max98373_slave_ops, .id_table = max98373_id, }; diff --git a/sound/soc/codecs/rt1308-sdw.c b/sound/soc/codecs/rt1308-sdw.c index f716668de6400e..8472d855c325eb 100644 --- a/sound/soc/codecs/rt1308-sdw.c +++ b/sound/soc/codecs/rt1308-sdw.c @@ -683,6 +683,16 @@ static int rt1308_sdw_probe(struct sdw_slave *slave, return 0; } +static int rt1308_sdw_remove(struct sdw_slave *slave) +{ + struct rt1308_sdw_priv *rt1308 = dev_get_drvdata(&slave->dev); + + if (rt1308->first_hw_init) + pm_runtime_disable(&slave->dev); + + return 0; +} + static const struct sdw_device_id rt1308_id[] = { SDW_SLAVE_ENTRY_EXT(0x025d, 0x1308, 0x2, 0, 0), {}, @@ -742,6 +752,7 @@ static struct sdw_driver rt1308_sdw_driver = { .pm = &rt1308_pm, }, .probe = rt1308_sdw_probe, + .remove = rt1308_sdw_remove, .ops = &rt1308_slave_ops, .id_table = rt1308_id, }; diff --git a/sound/soc/codecs/rt1316-sdw.c b/sound/soc/codecs/rt1316-sdw.c index 09b4914bba1bfe..09cf3ca86fa4a0 100644 --- a/sound/soc/codecs/rt1316-sdw.c +++ b/sound/soc/codecs/rt1316-sdw.c @@ -675,6 +675,16 @@ static int rt1316_sdw_probe(struct sdw_slave *slave, return rt1316_sdw_init(&slave->dev, regmap, slave); } +static int rt1316_sdw_remove(struct sdw_slave *slave) +{ + struct rt1316_sdw_priv *rt1316 = dev_get_drvdata(&slave->dev); + + if (rt1316->first_hw_init) + pm_runtime_disable(&slave->dev); + + return 0; +} + static const struct sdw_device_id rt1316_id[] = { SDW_SLAVE_ENTRY_EXT(0x025d, 0x1316, 0x3, 0x1, 0), {}, @@ -734,6 +744,7 @@ static struct sdw_driver rt1316_sdw_driver = { .pm = &rt1316_pm, }, .probe = rt1316_sdw_probe, + .remove = rt1316_sdw_remove, .ops = &rt1316_slave_ops, .id_table = rt1316_id, }; diff --git a/sound/soc/codecs/rt5682-sdw.c b/sound/soc/codecs/rt5682-sdw.c index 31a4f286043e46..a030c9987b9201 100644 --- a/sound/soc/codecs/rt5682-sdw.c +++ b/sound/soc/codecs/rt5682-sdw.c @@ -719,9 +719,12 @@ static int rt5682_sdw_remove(struct sdw_slave *slave) { struct rt5682_priv *rt5682 = dev_get_drvdata(&slave->dev); - if (rt5682 && rt5682->hw_init) + if (rt5682->hw_init) cancel_delayed_work_sync(&rt5682->jack_detect_work); + if (rt5682->first_hw_init) + pm_runtime_disable(&slave->dev); + return 0; } diff --git a/sound/soc/codecs/rt700-sdw.c b/sound/soc/codecs/rt700-sdw.c index bda59489966429..f7439e40ca8b54 100644 --- a/sound/soc/codecs/rt700-sdw.c +++ b/sound/soc/codecs/rt700-sdw.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include "rt700.h" @@ -463,11 +464,14 @@ static int rt700_sdw_remove(struct sdw_slave *slave) { struct rt700_priv *rt700 = dev_get_drvdata(&slave->dev); - if (rt700 && rt700->hw_init) { + if (rt700->hw_init) { cancel_delayed_work_sync(&rt700->jack_detect_work); cancel_delayed_work_sync(&rt700->jack_btn_check_work); } + if (rt700->first_hw_init) + pm_runtime_disable(&slave->dev); + return 0; } diff --git a/sound/soc/codecs/rt711-sdca-sdw.c b/sound/soc/codecs/rt711-sdca-sdw.c index aaf5af153d3fea..c722a2b0041f72 100644 --- a/sound/soc/codecs/rt711-sdca-sdw.c +++ b/sound/soc/codecs/rt711-sdca-sdw.c @@ -11,6 +11,7 @@ #include #include #include +#include #include "rt711-sdca.h" #include "rt711-sdca-sdw.h" @@ -364,11 +365,14 @@ static int rt711_sdca_sdw_remove(struct sdw_slave *slave) { struct rt711_sdca_priv *rt711 = dev_get_drvdata(&slave->dev); - if (rt711 && rt711->hw_init) { + if (rt711->hw_init) { cancel_delayed_work_sync(&rt711->jack_detect_work); cancel_delayed_work_sync(&rt711->jack_btn_check_work); } + if (rt711->first_hw_init) + pm_runtime_disable(&slave->dev); + return 0; } diff --git a/sound/soc/codecs/rt711-sdw.c b/sound/soc/codecs/rt711-sdw.c index bda2cc9439c987..f49c94baa37c6e 100644 --- a/sound/soc/codecs/rt711-sdw.c +++ b/sound/soc/codecs/rt711-sdw.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include "rt711.h" @@ -464,12 +465,15 @@ static int rt711_sdw_remove(struct sdw_slave *slave) { struct rt711_priv *rt711 = dev_get_drvdata(&slave->dev); - if (rt711 && rt711->hw_init) { + if (rt711->hw_init) { cancel_delayed_work_sync(&rt711->jack_detect_work); cancel_delayed_work_sync(&rt711->jack_btn_check_work); cancel_work_sync(&rt711->calibration_work); } + if (rt711->first_hw_init) + pm_runtime_disable(&slave->dev); + return 0; } diff --git a/sound/soc/codecs/rt715-sdca-sdw.c b/sound/soc/codecs/rt715-sdca-sdw.c index a5c673f43d8242..0f4354eafef25d 100644 --- a/sound/soc/codecs/rt715-sdca-sdw.c +++ b/sound/soc/codecs/rt715-sdca-sdw.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include "rt715-sdca.h" @@ -195,6 +196,16 @@ static int rt715_sdca_sdw_probe(struct sdw_slave *slave, return rt715_sdca_init(&slave->dev, mbq_regmap, regmap, slave); } +static int rt715_sdca_sdw_remove(struct sdw_slave *slave) +{ + struct rt715_sdca_priv *rt715 = dev_get_drvdata(&slave->dev); + + if (rt715->first_hw_init) + pm_runtime_disable(&slave->dev); + + return 0; +} + static const struct sdw_device_id rt715_sdca_id[] = { SDW_SLAVE_ENTRY_EXT(0x025d, 0x715, 0x3, 0x1, 0), SDW_SLAVE_ENTRY_EXT(0x025d, 0x714, 0x3, 0x1, 0), @@ -269,6 +280,7 @@ static struct sdw_driver rt715_sdw_driver = { .pm = &rt715_pm, }, .probe = rt715_sdca_sdw_probe, + .remove = rt715_sdca_sdw_remove, .ops = &rt715_sdca_slave_ops, .id_table = rt715_sdca_id, }; diff --git a/sound/soc/codecs/rt715-sdw.c b/sound/soc/codecs/rt715-sdw.c index a7b21b03c08bb7..b047bf87a100c5 100644 --- a/sound/soc/codecs/rt715-sdw.c +++ b/sound/soc/codecs/rt715-sdw.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -514,6 +515,16 @@ static int rt715_sdw_probe(struct sdw_slave *slave, return 0; } +static int rt715_sdw_remove(struct sdw_slave *slave) +{ + struct rt715_priv *rt715 = dev_get_drvdata(&slave->dev); + + if (rt715->first_hw_init) + pm_runtime_disable(&slave->dev); + + return 0; +} + static const struct sdw_device_id rt715_id[] = { SDW_SLAVE_ENTRY_EXT(0x025d, 0x714, 0x2, 0, 0), SDW_SLAVE_ENTRY_EXT(0x025d, 0x715, 0x2, 0, 0), @@ -575,6 +586,7 @@ static struct sdw_driver rt715_sdw_driver = { .pm = &rt715_pm, }, .probe = rt715_sdw_probe, + .remove = rt715_sdw_remove, .ops = &rt715_slave_ops, .id_table = rt715_id, }; From 42664a97f2b59de7a3b5e27c603f903e32ae414b Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Mon, 6 Jun 2022 15:37:47 -0500 Subject: [PATCH 0627/1056] ASoC: rt711-sdca-sdw: fix calibrate mutex initialization [ Upstream commit ed0a7fb29c9fd4f53eeb37d1fe2354df7a038047 ] In codec driver bind/unbind test, the following warning is thrown: DEBUG_LOCKS_WARN_ON(lock->magic != lock) ... [ 699.182495] rt711_sdca_jack_init+0x1b/0x1d0 [snd_soc_rt711_sdca] [ 699.182498] rt711_sdca_set_jack_detect+0x3b/0x90 [snd_soc_rt711_sdca] [ 699.182500] snd_soc_component_set_jack+0x24/0x50 [snd_soc_core] A quick check in the code shows that the 'calibrate_mutex' used by this driver are not initialized at probe time. Moving the initialization to the probe removes the issue. BugLink: https://github.com/thesofproject/linux/issues/3644 Signed-off-by: Pierre-Louis Bossart Reviewed-by: Rander Wang Reviewed-by: Bard Liao Link: https://lore.kernel.org/r/20220606203752.144159-3-pierre-louis.bossart@linux.intel.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/codecs/rt711-sdca-sdw.c | 3 +++ sound/soc/codecs/rt711-sdca.c | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/sound/soc/codecs/rt711-sdca-sdw.c b/sound/soc/codecs/rt711-sdca-sdw.c index c722a2b0041f72..a085b2f530aa1d 100644 --- a/sound/soc/codecs/rt711-sdca-sdw.c +++ b/sound/soc/codecs/rt711-sdca-sdw.c @@ -373,6 +373,9 @@ static int rt711_sdca_sdw_remove(struct sdw_slave *slave) if (rt711->first_hw_init) pm_runtime_disable(&slave->dev); + mutex_destroy(&rt711->calibrate_mutex); + mutex_destroy(&rt711->disable_irq_lock); + return 0; } diff --git a/sound/soc/codecs/rt711-sdca.c b/sound/soc/codecs/rt711-sdca.c index c15fb98eac86d3..66555cb5d1e4ee 100644 --- a/sound/soc/codecs/rt711-sdca.c +++ b/sound/soc/codecs/rt711-sdca.c @@ -1414,6 +1414,7 @@ int rt711_sdca_init(struct device *dev, struct regmap *regmap, rt711->regmap = regmap; rt711->mbq_regmap = mbq_regmap; + mutex_init(&rt711->calibrate_mutex); mutex_init(&rt711->disable_irq_lock); /* @@ -1552,7 +1553,6 @@ int rt711_sdca_io_init(struct device *dev, struct sdw_slave *slave) rt711_sdca_jack_detect_handler); INIT_DELAYED_WORK(&rt711->jack_btn_check_work, rt711_sdca_btn_check_handler); - mutex_init(&rt711->calibrate_mutex); } /* calibration */ From f2556ce6b35ae0fc72000a4daa21ded12665e2f2 Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Mon, 6 Jun 2022 15:37:48 -0500 Subject: [PATCH 0628/1056] ASoC: Intel: sof_sdw: handle errors on card registration [ Upstream commit fe154c4ff376bc31041c6441958a08243df09c99 ] If the card registration fails, typically because of deferred probes, the device properties added for headset codecs are not removed, which leads to kernel oopses in driver bind/unbind tests. We already clean-up the device properties when the card is removed, this code can be moved as a helper and called upon card registration errors. Signed-off-by: Pierre-Louis Bossart Reviewed-by: Rander Wang Reviewed-by: Bard Liao Link: https://lore.kernel.org/r/20220606203752.144159-4-pierre-louis.bossart@linux.intel.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/intel/boards/sof_sdw.c | 51 ++++++++++++++++++-------------- 1 file changed, 29 insertions(+), 22 deletions(-) diff --git a/sound/soc/intel/boards/sof_sdw.c b/sound/soc/intel/boards/sof_sdw.c index 0bf3e56e1d580a..abe39a0ef14b08 100644 --- a/sound/soc/intel/boards/sof_sdw.c +++ b/sound/soc/intel/boards/sof_sdw.c @@ -1323,6 +1323,33 @@ static struct snd_soc_card card_sof_sdw = { .late_probe = sof_sdw_card_late_probe, }; +static void mc_dailink_exit_loop(struct snd_soc_card *card) +{ + struct snd_soc_dai_link *link; + int ret; + int i, j; + + for (i = 0; i < ARRAY_SIZE(codec_info_list); i++) { + if (!codec_info_list[i].exit) + continue; + /* + * We don't need to call .exit function if there is no matched + * dai link found. + */ + for_each_card_prelinks(card, j, link) { + if (!strcmp(link->codecs[0].dai_name, + codec_info_list[i].dai_name)) { + ret = codec_info_list[i].exit(card, link); + if (ret) + dev_warn(card->dev, + "codec exit failed %d\n", + ret); + break; + } + } + } +} + static int mc_probe(struct platform_device *pdev) { struct snd_soc_card *card = &card_sof_sdw; @@ -1387,6 +1414,7 @@ static int mc_probe(struct platform_device *pdev) ret = devm_snd_soc_register_card(&pdev->dev, card); if (ret) { dev_err(card->dev, "snd_soc_register_card failed %d\n", ret); + mc_dailink_exit_loop(card); return ret; } @@ -1398,29 +1426,8 @@ static int mc_probe(struct platform_device *pdev) static int mc_remove(struct platform_device *pdev) { struct snd_soc_card *card = platform_get_drvdata(pdev); - struct snd_soc_dai_link *link; - int ret; - int i, j; - for (i = 0; i < ARRAY_SIZE(codec_info_list); i++) { - if (!codec_info_list[i].exit) - continue; - /* - * We don't need to call .exit function if there is no matched - * dai link found. - */ - for_each_card_prelinks(card, j, link) { - if (!strcmp(link->codecs[0].dai_name, - codec_info_list[i].dai_name)) { - ret = codec_info_list[i].exit(card, link); - if (ret) - dev_warn(&pdev->dev, - "codec exit failed %d\n", - ret); - break; - } - } - } + mc_dailink_exit_loop(card); return 0; } From b054614dd3fd21479d8582aca071015187393b81 Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Mon, 6 Jun 2022 15:37:49 -0500 Subject: [PATCH 0629/1056] ASoC: rt711: fix calibrate mutex initialization [ Upstream commit 08bb5dc6ce02374169213cea772b1c297eaf32d5 ] Follow the same flow as rt711-sdca and initialize all mutexes at probe time. Signed-off-by: Pierre-Louis Bossart Reviewed-by: Rander Wang Reviewed-by: Bard Liao Link: https://lore.kernel.org/r/20220606203752.144159-5-pierre-louis.bossart@linux.intel.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/codecs/rt711-sdw.c | 3 +++ sound/soc/codecs/rt711.c | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/sound/soc/codecs/rt711-sdw.c b/sound/soc/codecs/rt711-sdw.c index f49c94baa37c6e..4fe68bcf2a7c22 100644 --- a/sound/soc/codecs/rt711-sdw.c +++ b/sound/soc/codecs/rt711-sdw.c @@ -474,6 +474,9 @@ static int rt711_sdw_remove(struct sdw_slave *slave) if (rt711->first_hw_init) pm_runtime_disable(&slave->dev); + mutex_destroy(&rt711->calibrate_mutex); + mutex_destroy(&rt711->disable_irq_lock); + return 0; } diff --git a/sound/soc/codecs/rt711.c b/sound/soc/codecs/rt711.c index fafb0ba8349fd6..6e9b532a62718b 100644 --- a/sound/soc/codecs/rt711.c +++ b/sound/soc/codecs/rt711.c @@ -1199,6 +1199,7 @@ int rt711_init(struct device *dev, struct regmap *sdw_regmap, rt711->sdw_regmap = sdw_regmap; rt711->regmap = regmap; + mutex_init(&rt711->calibrate_mutex); mutex_init(&rt711->disable_irq_lock); /* @@ -1313,7 +1314,6 @@ int rt711_io_init(struct device *dev, struct sdw_slave *slave) rt711_jack_detect_handler); INIT_DELAYED_WORK(&rt711->jack_btn_check_work, rt711_btn_check_handler); - mutex_init(&rt711->calibrate_mutex); INIT_WORK(&rt711->calibration_work, rt711_calibration_work); schedule_work(&rt711->calibration_work); } From 07a606e1389a63b61cb8cd591026f30529117573 Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Mon, 6 Jun 2022 15:37:50 -0500 Subject: [PATCH 0630/1056] ASoC: rt7*-sdw: harden jack_detect_handler [ Upstream commit 0484271ab0ce50649329fa9dc23c50853c5b26a4 ] Realtek headset codec drivers typically check if the card is instantiated before proceeding with the jack detection. The rt700, rt711 and rt711-sdca are however missing a check on the card pointer, which can lead to NULL dereferences encountered in driver bind/unbind tests. Signed-off-by: Pierre-Louis Bossart Reviewed-by: Rander Wang Reviewed-by: Bard Liao Link: https://lore.kernel.org/r/20220606203752.144159-6-pierre-louis.bossart@linux.intel.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/codecs/rt700.c | 2 +- sound/soc/codecs/rt711-sdca.c | 2 +- sound/soc/codecs/rt711.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/sound/soc/codecs/rt700.c b/sound/soc/codecs/rt700.c index 614a40247679b2..c70fe8b06e6dad 100644 --- a/sound/soc/codecs/rt700.c +++ b/sound/soc/codecs/rt700.c @@ -162,7 +162,7 @@ static void rt700_jack_detect_handler(struct work_struct *work) if (!rt700->hs_jack) return; - if (!rt700->component->card->instantiated) + if (!rt700->component->card || !rt700->component->card->instantiated) return; reg = RT700_VERB_GET_PIN_SENSE | RT700_HP_OUT; diff --git a/sound/soc/codecs/rt711-sdca.c b/sound/soc/codecs/rt711-sdca.c index 66555cb5d1e4ee..2950cde029f74d 100644 --- a/sound/soc/codecs/rt711-sdca.c +++ b/sound/soc/codecs/rt711-sdca.c @@ -294,7 +294,7 @@ static void rt711_sdca_jack_detect_handler(struct work_struct *work) if (!rt711->hs_jack) return; - if (!rt711->component->card->instantiated) + if (!rt711->component->card || !rt711->component->card->instantiated) return; /* SDW_SCP_SDCA_INT_SDCA_0 is used for jack detection */ diff --git a/sound/soc/codecs/rt711.c b/sound/soc/codecs/rt711.c index 6e9b532a62718b..344bfbc5683d6e 100644 --- a/sound/soc/codecs/rt711.c +++ b/sound/soc/codecs/rt711.c @@ -242,7 +242,7 @@ static void rt711_jack_detect_handler(struct work_struct *work) if (!rt711->hs_jack) return; - if (!rt711->component->card->instantiated) + if (!rt711->component->card || !rt711->component->card->instantiated) return; reg = RT711_VERB_GET_PIN_SENSE | RT711_HP_OUT; From e71d0e1370b162271a0053e1f1215961c31dc4cb Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Mon, 6 Jun 2022 15:37:51 -0500 Subject: [PATCH 0631/1056] ASoC: codecs: rt700/rt711/rt711-sdca: initialize workqueues in probe [ Upstream commit ba98d7d8b60ba410aa03834f6aa48fd3b2e68478 ] The workqueues are initialized in the io_init functions, which isn't quite right. In some tests, this leads to warnings throw from __queue_delayed_work() WARN_ON_FUNCTION_MISMATCH(timer->function, delayed_work_timer_fn); Move all the initializations to the probe functions. Signed-off-by: Pierre-Louis Bossart Reviewed-by: Rander Wang Reviewed-by: Bard Liao Link: https://lore.kernel.org/r/20220606203752.144159-7-pierre-louis.bossart@linux.intel.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/codecs/rt700.c | 12 +++++------- sound/soc/codecs/rt711-sdca.c | 10 +++------- sound/soc/codecs/rt711.c | 12 +++++------- 3 files changed, 13 insertions(+), 21 deletions(-) diff --git a/sound/soc/codecs/rt700.c b/sound/soc/codecs/rt700.c index c70fe8b06e6dad..e049d672ccfd0a 100644 --- a/sound/soc/codecs/rt700.c +++ b/sound/soc/codecs/rt700.c @@ -1124,6 +1124,11 @@ int rt700_init(struct device *dev, struct regmap *sdw_regmap, mutex_init(&rt700->disable_irq_lock); + INIT_DELAYED_WORK(&rt700->jack_detect_work, + rt700_jack_detect_handler); + INIT_DELAYED_WORK(&rt700->jack_btn_check_work, + rt700_btn_check_handler); + /* * Mark hw_init to false * HW init will be performed when device reports present @@ -1218,13 +1223,6 @@ int rt700_io_init(struct device *dev, struct sdw_slave *slave) /* Finish Initial Settings, set power to D3 */ regmap_write(rt700->regmap, RT700_SET_AUDIO_POWER_STATE, AC_PWRST_D3); - if (!rt700->first_hw_init) { - INIT_DELAYED_WORK(&rt700->jack_detect_work, - rt700_jack_detect_handler); - INIT_DELAYED_WORK(&rt700->jack_btn_check_work, - rt700_btn_check_handler); - } - /* * if set_jack callback occurred early than io_init, * we set up the jack detection function now diff --git a/sound/soc/codecs/rt711-sdca.c b/sound/soc/codecs/rt711-sdca.c index 2950cde029f74d..60aef52b3fe4a7 100644 --- a/sound/soc/codecs/rt711-sdca.c +++ b/sound/soc/codecs/rt711-sdca.c @@ -1417,6 +1417,9 @@ int rt711_sdca_init(struct device *dev, struct regmap *regmap, mutex_init(&rt711->calibrate_mutex); mutex_init(&rt711->disable_irq_lock); + INIT_DELAYED_WORK(&rt711->jack_detect_work, rt711_sdca_jack_detect_handler); + INIT_DELAYED_WORK(&rt711->jack_btn_check_work, rt711_sdca_btn_check_handler); + /* * Mark hw_init to false * HW init will be performed when device reports present @@ -1548,13 +1551,6 @@ int rt711_sdca_io_init(struct device *dev, struct sdw_slave *slave) rt711_sdca_index_update_bits(rt711, RT711_VENDOR_HDA_CTL, RT711_PUSH_BTN_INT_CTL0, 0x20, 0x00); - if (!rt711->first_hw_init) { - INIT_DELAYED_WORK(&rt711->jack_detect_work, - rt711_sdca_jack_detect_handler); - INIT_DELAYED_WORK(&rt711->jack_btn_check_work, - rt711_sdca_btn_check_handler); - } - /* calibration */ ret = rt711_sdca_calibration(rt711); if (ret < 0) diff --git a/sound/soc/codecs/rt711.c b/sound/soc/codecs/rt711.c index 344bfbc5683d6e..51a98e730fc8fd 100644 --- a/sound/soc/codecs/rt711.c +++ b/sound/soc/codecs/rt711.c @@ -1202,6 +1202,10 @@ int rt711_init(struct device *dev, struct regmap *sdw_regmap, mutex_init(&rt711->calibrate_mutex); mutex_init(&rt711->disable_irq_lock); + INIT_DELAYED_WORK(&rt711->jack_detect_work, rt711_jack_detect_handler); + INIT_DELAYED_WORK(&rt711->jack_btn_check_work, rt711_btn_check_handler); + INIT_WORK(&rt711->calibration_work, rt711_calibration_work); + /* * Mark hw_init to false * HW init will be performed when device reports present @@ -1309,14 +1313,8 @@ int rt711_io_init(struct device *dev, struct sdw_slave *slave) if (rt711->first_hw_init) rt711_calibration(rt711); - else { - INIT_DELAYED_WORK(&rt711->jack_detect_work, - rt711_jack_detect_handler); - INIT_DELAYED_WORK(&rt711->jack_btn_check_work, - rt711_btn_check_handler); - INIT_WORK(&rt711->calibration_work, rt711_calibration_work); + else schedule_work(&rt711->calibration_work); - } /* * if set_jack callback occurred early than io_init, From 05708fb473ce7f2af1082c39ec91077a5a404cf4 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Thu, 9 Jun 2022 11:59:49 +0300 Subject: [PATCH 0632/1056] ASoC: SOF: Intel: hda-loader: Clarify the cl_dsp_init() flow [ Upstream commit bbfef046c6613404c01aeb9e9928bebb78dd327a ] Update the comment for the cl_dsp_init() to clarify what is done by the function and use the chip->init_core_mask instead of BIT(0) when unstalling/running the init core. Complements: 2a68ff846164 ("ASoC: SOF: Intel: hda: Revisit IMR boot sequence") Signed-off-by: Peter Ujfalusi Reviewed-by: Pierre-Louis Bossart Reviewed-by: Bard Liao Reviewed-by: Ranjani Sridharan Link: https://lore.kernel.org/r/20220609085949.29062-4-peter.ujfalusi@linux.intel.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/sof/intel/hda-loader.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sound/soc/sof/intel/hda-loader.c b/sound/soc/sof/intel/hda-loader.c index 14469e087b00d3..ee09393d42cb07 100644 --- a/sound/soc/sof/intel/hda-loader.c +++ b/sound/soc/sof/intel/hda-loader.c @@ -80,9 +80,9 @@ static struct hdac_ext_stream *cl_stream_prepare(struct snd_sof_dev *sdev, unsig } /* - * first boot sequence has some extra steps. core 0 waits for power - * status on core 1, so power up core 1 also momentarily, keep it in - * reset/stall and then turn it off + * first boot sequence has some extra steps. + * power on all host managed cores and only unstall/run the boot core to boot the + * DSP then turn off all non boot cores (if any) is powered on. */ static int cl_dsp_init(struct snd_sof_dev *sdev, int stream_tag) { @@ -117,7 +117,7 @@ static int cl_dsp_init(struct snd_sof_dev *sdev, int stream_tag) ((stream_tag - 1) << 9))); /* step 3: unset core 0 reset state & unstall/run core 0 */ - ret = hda_dsp_core_run(sdev, BIT(0)); + ret = hda_dsp_core_run(sdev, chip->init_core_mask); if (ret < 0) { if (hda->boot_iteration == HDA_FW_BOOT_ATTEMPTS) dev_err(sdev->dev, From dd70da2a5816821ab4bd22313ad8d22cfa16aeba Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Fri, 3 Jun 2022 14:25:26 +0200 Subject: [PATCH 0633/1056] ASoC: wcd938x: Fix event generation for some controls [ Upstream commit 10e7ff0047921e32b919ecee7be706dd33c107f8 ] Currently wcd938x_*_put() unconditionally report that the value of the control changed, resulting in spurious events being generated. Return 0 in that case instead as we should. There is still an issue in the compander control which is a bit more complex. Signed-off-by: Mark Brown Reported-by: kernel test robot Link: https://lore.kernel.org/r/20220603122526.3914942-1-broonie@kernel.org Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/codecs/wcd938x.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/sound/soc/codecs/wcd938x.c b/sound/soc/codecs/wcd938x.c index 4480c118ed5d95..8cdc45e669f2d3 100644 --- a/sound/soc/codecs/wcd938x.c +++ b/sound/soc/codecs/wcd938x.c @@ -2517,6 +2517,9 @@ static int wcd938x_tx_mode_put(struct snd_kcontrol *kcontrol, struct soc_enum *e = (struct soc_enum *)kcontrol->private_value; int path = e->shift_l; + if (wcd938x->tx_mode[path] == ucontrol->value.enumerated.item[0]) + return 0; + wcd938x->tx_mode[path] = ucontrol->value.enumerated.item[0]; return 1; @@ -2539,6 +2542,9 @@ static int wcd938x_rx_hph_mode_put(struct snd_kcontrol *kcontrol, struct snd_soc_component *component = snd_soc_kcontrol_component(kcontrol); struct wcd938x_priv *wcd938x = snd_soc_component_get_drvdata(component); + if (wcd938x->hph_mode == ucontrol->value.enumerated.item[0]) + return 0; + wcd938x->hph_mode = ucontrol->value.enumerated.item[0]; return 1; @@ -2630,6 +2636,9 @@ static int wcd938x_ldoh_put(struct snd_kcontrol *kcontrol, struct snd_soc_component *component = snd_soc_kcontrol_component(kcontrol); struct wcd938x_priv *wcd938x = snd_soc_component_get_drvdata(component); + if (wcd938x->ldoh == ucontrol->value.integer.value[0]) + return 0; + wcd938x->ldoh = ucontrol->value.integer.value[0]; return 1; @@ -2652,6 +2661,9 @@ static int wcd938x_bcs_put(struct snd_kcontrol *kcontrol, struct snd_soc_component *component = snd_soc_kcontrol_component(kcontrol); struct wcd938x_priv *wcd938x = snd_soc_component_get_drvdata(component); + if (wcd938x->bcs_dis == ucontrol->value.integer.value[0]) + return 0; + wcd938x->bcs_dis = ucontrol->value.integer.value[0]; return 1; From bc90670626006f172e1ca0ee241c5d9b0a06d051 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sun, 12 Jun 2022 17:56:52 +0200 Subject: [PATCH 0634/1056] ASoC: Intel: bytcr_wm5102: Fix GPIO related probe-ordering problem [ Upstream commit 4e07479eab8a044cc9542414ccb4aeb8eb033bde ] The "wlf,spkvdd-ena" GPIO needed by the bytcr_wm5102 driver is made available through a gpio-lookup table. This gpio-lookup table is registered by drivers/mfd/arizona-spi.c, which may get probed after the bytcr_wm5102 driver. If the gpio-lookup table has not registered yet then the gpiod_get() will return -ENOENT. Treat -ENOENT as -EPROBE_DEFER to still keep things working in this case. Signed-off-by: Hans de Goede Acked-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20220612155652.107310-1-hdegoede@redhat.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/intel/boards/bytcr_wm5102.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/sound/soc/intel/boards/bytcr_wm5102.c b/sound/soc/intel/boards/bytcr_wm5102.c index 580d5fddae5ada..bb669d58eb8b3c 100644 --- a/sound/soc/intel/boards/bytcr_wm5102.c +++ b/sound/soc/intel/boards/bytcr_wm5102.c @@ -421,8 +421,17 @@ static int snd_byt_wm5102_mc_probe(struct platform_device *pdev) priv->spkvdd_en_gpio = gpiod_get(codec_dev, "wlf,spkvdd-ena", GPIOD_OUT_LOW); put_device(codec_dev); - if (IS_ERR(priv->spkvdd_en_gpio)) - return dev_err_probe(dev, PTR_ERR(priv->spkvdd_en_gpio), "getting spkvdd-GPIO\n"); + if (IS_ERR(priv->spkvdd_en_gpio)) { + ret = PTR_ERR(priv->spkvdd_en_gpio); + /* + * The spkvdd gpio-lookup is registered by: drivers/mfd/arizona-spi.c, + * so -ENOENT means that arizona-spi hasn't probed yet. + */ + if (ret == -ENOENT) + ret = -EPROBE_DEFER; + + return dev_err_probe(dev, ret, "getting spkvdd-GPIO\n"); + } /* override platform name, if required */ byt_wm5102_card.dev = dev; From 44975751bef02e8ec25f4770c8f1c01f92ce892b Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Tue, 21 Jun 2022 11:20:39 +0100 Subject: [PATCH 0635/1056] ASoC: wm5110: Fix DRE control [ Upstream commit 0bc0ae9a5938d512fd5d44f11c9c04892dcf4961 ] The DRE controls on wm5110 should return a value of 1 if the DRE state is actually changed, update to fix this. Signed-off-by: Charles Keepax Link: https://lore.kernel.org/r/20220621102041.1713504-2-ckeepax@opensource.cirrus.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/codecs/wm5110.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/sound/soc/codecs/wm5110.c b/sound/soc/codecs/wm5110.c index 5c2d45d05c975c..7c6e01720d6517 100644 --- a/sound/soc/codecs/wm5110.c +++ b/sound/soc/codecs/wm5110.c @@ -413,6 +413,7 @@ static int wm5110_put_dre(struct snd_kcontrol *kcontrol, unsigned int rnew = (!!ucontrol->value.integer.value[1]) << mc->rshift; unsigned int lold, rold; unsigned int lena, rena; + bool change = false; int ret; snd_soc_dapm_mutex_lock(dapm); @@ -440,8 +441,8 @@ static int wm5110_put_dre(struct snd_kcontrol *kcontrol, goto err; } - ret = regmap_update_bits(arizona->regmap, ARIZONA_DRE_ENABLE, - mask, lnew | rnew); + ret = regmap_update_bits_check(arizona->regmap, ARIZONA_DRE_ENABLE, + mask, lnew | rnew, &change); if (ret) { dev_err(arizona->dev, "Failed to set DRE: %d\n", ret); goto err; @@ -454,6 +455,9 @@ static int wm5110_put_dre(struct snd_kcontrol *kcontrol, if (!rnew && rold) wm5110_clear_pga_volume(arizona, mc->rshift); + if (change) + ret = 1; + err: snd_soc_dapm_mutex_unlock(dapm); From 269be8b2907378adf72d7347cfa43ef230351a06 Mon Sep 17 00:00:00 2001 From: Shuming Fan Date: Tue, 21 Jun 2022 17:07:19 +0800 Subject: [PATCH 0636/1056] ASoC: rt711-sdca: fix kernel NULL pointer dereference when IO error [ Upstream commit 1df793d479bef546569fc2e409ff8bb3f0fb8e99 ] The initial settings will be written before the codec probe function. But, the rt711->component doesn't be assigned yet. If IO error happened during initial settings operations, it will cause the kernel panic. This patch changed component->dev to slave->dev to fix this issue. Signed-off-by: Shuming Fan Link: https://lore.kernel.org/r/20220621090719.30558-1-shumingf@realtek.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/codecs/rt711-sdca.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/codecs/rt711-sdca.c b/sound/soc/codecs/rt711-sdca.c index 60aef52b3fe4a7..3b5df3ea2f602e 100644 --- a/sound/soc/codecs/rt711-sdca.c +++ b/sound/soc/codecs/rt711-sdca.c @@ -34,7 +34,7 @@ static int rt711_sdca_index_write(struct rt711_sdca_priv *rt711, ret = regmap_write(regmap, addr, value); if (ret < 0) - dev_err(rt711->component->dev, + dev_err(&rt711->slave->dev, "Failed to set private value: %06x <= %04x ret=%d\n", addr, value, ret); @@ -50,7 +50,7 @@ static int rt711_sdca_index_read(struct rt711_sdca_priv *rt711, ret = regmap_read(regmap, addr, value); if (ret < 0) - dev_err(rt711->component->dev, + dev_err(&rt711->slave->dev, "Failed to get private value: %06x => %04x ret=%d\n", addr, *value, ret); From ab2f5e5f3ce12f74b095de8cfee7c680e5a981b3 Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Thu, 23 Jun 2022 11:51:15 +0100 Subject: [PATCH 0637/1056] ASoC: dapm: Initialise kcontrol data for mux/demux controls [ Upstream commit 11d7a12f7f50baa5af9090b131c9b03af59503e7 ] DAPM keeps a copy of the current value of mux/demux controls, however this value is only initialised in the case of autodisable controls. This leads to false notification events when first modifying a DAPM kcontrol that has a non-zero default. Autodisable controls are left as they are, since they already initialise the value, and there would be more work required to support autodisable muxes where the first option isn't disabled and/or that isn't the default. Technically this issue could affect mixer/switch elements as well, although not on any of the devices I am currently running. There is also a little more work to do to address the issue there due to that side supporting stereo controls, so that has not been tackled in this patch. Signed-off-by: Charles Keepax Link: https://lore.kernel.org/r/20220623105120.1981154-1-ckeepax@opensource.cirrus.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/soc-dapm.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c index 47b85ba5b7d6d7..b957049bae3371 100644 --- a/sound/soc/soc-dapm.c +++ b/sound/soc/soc-dapm.c @@ -62,6 +62,8 @@ struct snd_soc_dapm_widget * snd_soc_dapm_new_control_unlocked(struct snd_soc_dapm_context *dapm, const struct snd_soc_dapm_widget *widget); +static unsigned int soc_dapm_read(struct snd_soc_dapm_context *dapm, int reg); + /* dapm power sequences - make this per codec in the future */ static int dapm_up_seq[] = { [snd_soc_dapm_pre] = 1, @@ -442,6 +444,9 @@ static int dapm_kcontrol_data_alloc(struct snd_soc_dapm_widget *widget, snd_soc_dapm_add_path(widget->dapm, data->widget, widget, NULL, NULL); + } else if (e->reg != SND_SOC_NOPM) { + data->value = soc_dapm_read(widget->dapm, e->reg) & + (e->mask << e->shift_l); } break; default: From 74ead64cbf0d3cd22c2e010bdb1d0c19abf66ca9 Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Thu, 23 Jun 2022 11:51:17 +0100 Subject: [PATCH 0638/1056] ASoC: cs47l15: Fix event generation for low power mux control [ Upstream commit 7f103af4a10f375b9b346b4d0b730f6a66b8c451 ] cs47l15_in1_adc_put always returns zero regardless of if the control value was updated. This results in missing notifications to user-space of the control change. Update the handling to return 1 when the value is changed. Signed-off-by: Charles Keepax Link: https://lore.kernel.org/r/20220623105120.1981154-3-ckeepax@opensource.cirrus.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/codecs/cs47l15.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/sound/soc/codecs/cs47l15.c b/sound/soc/codecs/cs47l15.c index 1ee83160b83fb7..ac9ccdea15b58f 100644 --- a/sound/soc/codecs/cs47l15.c +++ b/sound/soc/codecs/cs47l15.c @@ -122,6 +122,9 @@ static int cs47l15_in1_adc_put(struct snd_kcontrol *kcontrol, snd_soc_kcontrol_component(kcontrol); struct cs47l15 *cs47l15 = snd_soc_component_get_drvdata(component); + if (!!ucontrol->value.integer.value[0] == cs47l15->in1_lp_mode) + return 0; + switch (ucontrol->value.integer.value[0]) { case 0: /* Set IN1 to normal mode */ @@ -150,7 +153,7 @@ static int cs47l15_in1_adc_put(struct snd_kcontrol *kcontrol, break; } - return 0; + return 1; } static const struct snd_kcontrol_new cs47l15_snd_controls[] = { From 66cc34f2e493b55fe3455848f275f8ee91a04ac8 Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Thu, 23 Jun 2022 11:51:18 +0100 Subject: [PATCH 0639/1056] ASoC: madera: Fix event generation for OUT1 demux [ Upstream commit e3cabbef3db8269207a6b8808f510137669f8deb ] madera_out1_demux_put returns the value of snd_soc_dapm_mux_update_power, which returns a 1 if a path was found for the kcontrol. This is obviously different to the expected return a 1 if the control was updated value. This results in spurious notifications to user-space. Update the handling to only return a 1 when the value is changed. Signed-off-by: Charles Keepax Link: https://lore.kernel.org/r/20220623105120.1981154-4-ckeepax@opensource.cirrus.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/codecs/madera.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/sound/soc/codecs/madera.c b/sound/soc/codecs/madera.c index f4ed7e04673fd3..d3e7a591b5a800 100644 --- a/sound/soc/codecs/madera.c +++ b/sound/soc/codecs/madera.c @@ -618,7 +618,13 @@ int madera_out1_demux_put(struct snd_kcontrol *kcontrol, end: snd_soc_dapm_mutex_unlock(dapm); - return snd_soc_dapm_mux_update_power(dapm, kcontrol, mux, e, NULL); + ret = snd_soc_dapm_mux_update_power(dapm, kcontrol, mux, e, NULL); + if (ret < 0) { + dev_err(madera->dev, "Failed to update demux power state: %d\n", ret); + return ret; + } + + return change; } EXPORT_SYMBOL_GPL(madera_out1_demux_put); From 9c9869c308a311ba7b5a48e6cdb3df22d63d088a Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Thu, 23 Jun 2022 11:51:19 +0100 Subject: [PATCH 0640/1056] ASoC: madera: Fix event generation for rate controls [ Upstream commit 980555e95f7cabdc9c80a07107622b097ba23703 ] madera_adsp_rate_put always returns zero regardless of if the control value was updated. This results in missing notifications to user-space of the control change. Update the handling to return 1 when the value is changed. Signed-off-by: Charles Keepax Link: https://lore.kernel.org/r/20220623105120.1981154-5-ckeepax@opensource.cirrus.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/codecs/madera.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sound/soc/codecs/madera.c b/sound/soc/codecs/madera.c index d3e7a591b5a800..fd4fa1d5d2d12d 100644 --- a/sound/soc/codecs/madera.c +++ b/sound/soc/codecs/madera.c @@ -899,7 +899,7 @@ static int madera_adsp_rate_put(struct snd_kcontrol *kcontrol, struct soc_enum *e = (struct soc_enum *)kcontrol->private_value; const int adsp_num = e->shift_l; const unsigned int item = ucontrol->value.enumerated.item[0]; - int ret; + int ret = 0; if (item >= e->items) return -EINVAL; @@ -916,10 +916,10 @@ static int madera_adsp_rate_put(struct snd_kcontrol *kcontrol, "Cannot change '%s' while in use by active audio paths\n", kcontrol->id.name); ret = -EBUSY; - } else { + } else if (priv->adsp_rate_cache[adsp_num] != e->values[item]) { /* Volatile register so defer until the codec is powered up */ priv->adsp_rate_cache[adsp_num] = e->values[item]; - ret = 0; + ret = 1; } mutex_unlock(&priv->rate_lock); From ee1da3d59674a1f6134fbe3dea7070e32f161083 Mon Sep 17 00:00:00 2001 From: Stafford Horne Date: Wed, 15 Jun 2022 08:54:26 +0900 Subject: [PATCH 0641/1056] irqchip: or1k-pic: Undefine mask_ack for level triggered hardware [ Upstream commit 8520501346ed8d1c4a6dfa751cb57328a9c843f1 ] The mask_ack operation clears the interrupt by writing to the PICSR register. This we don't want for level triggered interrupt because it does not actually clear the interrupt on the source hardware. This was causing issues in qemu with multi core setups where interrupts would continue to fire even though they had been cleared in PICSR. Just remove the mask_ack operation. Acked-by: Marc Zyngier Signed-off-by: Stafford Horne Signed-off-by: Sasha Levin --- drivers/irqchip/irq-or1k-pic.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/irqchip/irq-or1k-pic.c b/drivers/irqchip/irq-or1k-pic.c index 03d2366118dd4b..d5f1fabc45d79e 100644 --- a/drivers/irqchip/irq-or1k-pic.c +++ b/drivers/irqchip/irq-or1k-pic.c @@ -66,7 +66,6 @@ static struct or1k_pic_dev or1k_pic_level = { .name = "or1k-PIC-level", .irq_unmask = or1k_pic_unmask, .irq_mask = or1k_pic_mask, - .irq_mask_ack = or1k_pic_mask_ack, }, .handle = handle_level_irq, .flags = IRQ_LEVEL | IRQ_NOPROBE, From 26bb7afc027ce6ac8ab6747babec674d55689ff0 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Thu, 30 Jun 2022 09:14:40 +0200 Subject: [PATCH 0642/1056] x86: Clear .brk area at early boot [ Upstream commit 38fa5479b41376dc9d7f57e71c83514285a25ca0 ] The .brk section has the same properties as .bss: it is an alloc-only section and should be cleared before being used. Not doing so is especially a problem for Xen PV guests, as the hypervisor will validate page tables (check for writable page tables and hypervisor private bits) before accepting them to be used. Make sure .brk is initially zero by letting clear_bss() clear the brk area, too. Signed-off-by: Juergen Gross Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/20220630071441.28576-3-jgross@suse.com Signed-off-by: Sasha Levin --- arch/x86/kernel/head64.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index de01903c373554..5036104d547077 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -418,6 +418,8 @@ static void __init clear_bss(void) { memset(__bss_start, 0, (unsigned long) __bss_stop - (unsigned long) __bss_start); + memset(__brk_base, 0, + (unsigned long) __brk_limit - (unsigned long) __brk_base); } static unsigned long get_cmd_line_ptr(void) From c0cba036bfe8e29ea2f0e852eb9880e3fcca1071 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Sun, 26 Jun 2022 09:43:15 +0200 Subject: [PATCH 0643/1056] soc: ixp4xx/npe: Fix unused match warning [ Upstream commit 620f83b8326ce9706b1118334f0257ae028ce045 ] The kernel test robot found this inconsistency: drivers/soc/ixp4xx/ixp4xx-npe.c:737:34: warning: 'ixp4xx_npe_of_match' defined but not used [-Wunused-const-variable=] 737 | static const struct of_device_id ixp4xx_npe_of_match[] = { This is because the match is enclosed in the of_match_ptr() which compiles into NULL when OF is disabled and this is unnecessary. Fix it by dropping of_match_ptr() around the match. Signed-off-by: Linus Walleij Link: https://lore.kernel.org/r/20220626074315.61209-1-linus.walleij@linaro.org' Signed-off-by: Arnd Bergmann Signed-off-by: Sasha Levin --- drivers/soc/ixp4xx/ixp4xx-npe.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/soc/ixp4xx/ixp4xx-npe.c b/drivers/soc/ixp4xx/ixp4xx-npe.c index f490c4ca51f51e..a0159805d061bb 100644 --- a/drivers/soc/ixp4xx/ixp4xx-npe.c +++ b/drivers/soc/ixp4xx/ixp4xx-npe.c @@ -743,7 +743,7 @@ static const struct of_device_id ixp4xx_npe_of_match[] = { static struct platform_driver ixp4xx_npe_driver = { .driver = { .name = "ixp4xx-npe", - .of_match_table = of_match_ptr(ixp4xx_npe_of_match), + .of_match_table = ixp4xx_npe_of_match, }, .probe = ixp4xx_npe_probe, .remove = ixp4xx_npe_remove, From bd87cf2ed609ff9e3985aa300dfe19e697922b59 Mon Sep 17 00:00:00 2001 From: Gabriel Fernandez Date: Fri, 24 Jun 2022 11:27:13 +0200 Subject: [PATCH 0644/1056] ARM: dts: stm32: use the correct clock source for CEC on stm32mp151 [ Upstream commit 78ece8cce1ba0c3f3e5a7c6c1b914b3794f04c44 ] The peripheral clock of CEC is not LSE but CEC. Signed-off-by: Gabriel Fernandez Signed-off-by: Alexandre Torgue Signed-off-by: Sasha Levin --- arch/arm/boot/dts/stm32mp151.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/stm32mp151.dtsi b/arch/arm/boot/dts/stm32mp151.dtsi index a9b65b3bfda58a..e0d48331879881 100644 --- a/arch/arm/boot/dts/stm32mp151.dtsi +++ b/arch/arm/boot/dts/stm32mp151.dtsi @@ -553,7 +553,7 @@ compatible = "st,stm32-cec"; reg = <0x40016000 0x400>; interrupts = ; - clocks = <&rcc CEC_K>, <&clk_lse>; + clocks = <&rcc CEC_K>, <&rcc CEC>; clock-names = "cec", "hdmi-cec"; status = "disabled"; }; From 2483ba7afa598ceebf5deb66c20ec3e730f3695b Mon Sep 17 00:00:00 2001 From: Srinivas Neeli Date: Thu, 9 Jun 2022 13:54:32 +0530 Subject: [PATCH 0645/1056] Revert "can: xilinx_can: Limit CANFD brp to 2" [ Upstream commit c6da4590fe819dfe28a4f8037a8dc1e056542fb4 ] This reverts commit 05ca14fdb6fe65614e0652d03e44b02748d25af7. On early silicon engineering samples observed bit shrinking issue when we use brp as 1. Hence updated brp_min as 2. As in production silicon this issue is fixed, so reverting the patch. Link: https://lore.kernel.org/all/20220609082433.1191060-2-srinivas.neeli@xilinx.com Signed-off-by: Srinivas Neeli Signed-off-by: Marc Kleine-Budde Signed-off-by: Sasha Levin --- drivers/net/can/xilinx_can.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/can/xilinx_can.c b/drivers/net/can/xilinx_can.c index 262b783d1df8a2..a2e751f0ae0b15 100644 --- a/drivers/net/can/xilinx_can.c +++ b/drivers/net/can/xilinx_can.c @@ -259,7 +259,7 @@ static const struct can_bittiming_const xcan_bittiming_const_canfd2 = { .tseg2_min = 1, .tseg2_max = 128, .sjw_max = 128, - .brp_min = 2, + .brp_min = 1, .brp_max = 256, .brp_inc = 1, }; @@ -272,7 +272,7 @@ static const struct can_bittiming_const xcan_data_bittiming_const_canfd2 = { .tseg2_min = 1, .tseg2_max = 16, .sjw_max = 16, - .brp_min = 2, + .brp_min = 1, .brp_max = 256, .brp_inc = 1, }; From 9d90a21f0cb745d99c4bfce2793e063c3944b823 Mon Sep 17 00:00:00 2001 From: John Veness Date: Fri, 24 Jun 2022 15:07:57 +0100 Subject: [PATCH 0646/1056] ALSA: usb-audio: Add quirks for MacroSilicon MS2100/MS2106 devices [ Upstream commit 6e2c9105e0b743c92a157389d40f00b81bdd09fe ] Treat the claimed 96kHz 1ch in the descriptors as 48kHz 2ch, so that the audio stream doesn't sound mono. Also fix initial stream alignment, so that left and right channels are in the correct order. Signed-off-by: John Veness Link: https://lore.kernel.org/r/20220624140757.28758-1-john-linux@pelago.org.uk Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/usb/quirks-table.h | 48 ++++++++++++++++++++++++++++++++++++++++ sound/usb/quirks.c | 3 +++ 2 files changed, 51 insertions(+) diff --git a/sound/usb/quirks-table.h b/sound/usb/quirks-table.h index 4f56e1784932a2..853da162fd18c3 100644 --- a/sound/usb/quirks-table.h +++ b/sound/usb/quirks-table.h @@ -3802,6 +3802,54 @@ YAMAHA_DEVICE(0x7010, "UB99"), } }, +/* + * MacroSilicon MS2100/MS2106 based AV capture cards + * + * These claim 96kHz 1ch in the descriptors, but are actually 48kHz 2ch. + * They also need QUIRK_FLAG_ALIGN_TRANSFER, which makes one wonder if + * they pretend to be 96kHz mono as a workaround for stereo being broken + * by that... + * + * They also have an issue with initial stream alignment that causes the + * channels to be swapped and out of phase, which is dealt with in quirks.c. + */ +{ + USB_AUDIO_DEVICE(0x534d, 0x0021), + .driver_info = (unsigned long) &(const struct snd_usb_audio_quirk) { + .vendor_name = "MacroSilicon", + .product_name = "MS210x", + .ifnum = QUIRK_ANY_INTERFACE, + .type = QUIRK_COMPOSITE, + .data = &(const struct snd_usb_audio_quirk[]) { + { + .ifnum = 2, + .type = QUIRK_AUDIO_STANDARD_MIXER, + }, + { + .ifnum = 3, + .type = QUIRK_AUDIO_FIXED_ENDPOINT, + .data = &(const struct audioformat) { + .formats = SNDRV_PCM_FMTBIT_S16_LE, + .channels = 2, + .iface = 3, + .altsetting = 1, + .altset_idx = 1, + .attributes = 0, + .endpoint = 0x82, + .ep_attr = USB_ENDPOINT_XFER_ISOC | + USB_ENDPOINT_SYNC_ASYNC, + .rates = SNDRV_PCM_RATE_CONTINUOUS, + .rate_min = 48000, + .rate_max = 48000, + } + }, + { + .ifnum = -1 + } + } + } +}, + /* * MacroSilicon MS2109 based HDMI capture cards * diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index 12ce69b04f634c..a7bcae0a2c75b4 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -1478,6 +1478,7 @@ void snd_usb_set_format_quirk(struct snd_usb_substream *subs, case USB_ID(0x041e, 0x3f19): /* E-Mu 0204 USB */ set_format_emu_quirk(subs, fmt); break; + case USB_ID(0x534d, 0x0021): /* MacroSilicon MS2100/MS2106 */ case USB_ID(0x534d, 0x2109): /* MacroSilicon MS2109 */ subs->stream_offset_adj = 2; break; @@ -1908,6 +1909,8 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = { QUIRK_FLAG_IGNORE_CTL_ERROR), DEVICE_FLG(0x413c, 0xa506, /* Dell AE515 sound bar */ QUIRK_FLAG_GET_SAMPLE_RATE), + DEVICE_FLG(0x534d, 0x0021, /* MacroSilicon MS2100/MS2106 */ + QUIRK_FLAG_ALIGN_TRANSFER), DEVICE_FLG(0x534d, 0x2109, /* MacroSilicon MS2109 */ QUIRK_FLAG_ALIGN_TRANSFER), DEVICE_FLG(0x1224, 0x2a25, /* Jieli Technology USB PHY 2.0 */ From df982f9d094dcc01ad31bf324837b7e690e8de92 Mon Sep 17 00:00:00 2001 From: Egor Vorontsov Date: Mon, 27 Jun 2022 13:00:34 +0300 Subject: [PATCH 0647/1056] ALSA: usb-audio: Add quirk for Fiero SC-01 [ Upstream commit 4fb7c24f69c48fdc02ea7858dbd5a60ff08bf7e5 ] Fiero SC-01 is a USB sound card with two mono inputs and a single stereo output. The inputs are composed into a single stereo stream. The device uses a vendor-provided driver on Windows and does not work at all without it. The driver mostly provides ASIO functionality, but also alters the way the sound card is queried for sample rates and clocks. ALSA queries those failing with an EPIPE (same as Windows 10 does). Presumably, the vendor-provided driver does not query it at all, simply matching by VID:PID. Thus, I consider this a buggy firmware and adhere to a set of fixed endpoint quirks instead. The soundcard has an internal clock. Implicit feedback mode is required for the playback. I have updated my device to v1.1.0 from a Windows 10 VM using a vendor- provided binary prior to the development, hoping for it to just begin working. The device provides no obvious way to downgrade the firmware, and regardless, there's no binary available for v1.0.0 anyway. Thus, I will be getting another unit to extend the patch with support for that. Expected to be a simple copy-paste of the existing one, though. There were no previous reports of that device in context of Linux anywhere. Other issues have been reported though, but that's out of the scope. Signed-off-by: Egor Vorontsov Link: https://lore.kernel.org/r/20220627100041.2861494-1-sdoregor@sdore.me Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/usb/quirks-table.h | 68 ++++++++++++++++++++++++++++++++++++++++ sound/usb/quirks.c | 2 ++ 2 files changed, 70 insertions(+) diff --git a/sound/usb/quirks-table.h b/sound/usb/quirks-table.h index 853da162fd18c3..7067d314fecd43 100644 --- a/sound/usb/quirks-table.h +++ b/sound/usb/quirks-table.h @@ -4167,6 +4167,74 @@ YAMAHA_DEVICE(0x7010, "UB99"), } } }, +{ + /* + * Fiero SC-01 (firmware v1.1.0) + */ + USB_DEVICE(0x2b53, 0x0031), + .driver_info = (unsigned long) &(const struct snd_usb_audio_quirk) { + .vendor_name = "Fiero", + .product_name = "SC-01", + .ifnum = QUIRK_ANY_INTERFACE, + .type = QUIRK_COMPOSITE, + .data = &(const struct snd_usb_audio_quirk[]) { + { + .ifnum = 0, + .type = QUIRK_AUDIO_STANDARD_INTERFACE + }, + /* Playback */ + { + .ifnum = 1, + .type = QUIRK_AUDIO_FIXED_ENDPOINT, + .data = &(const struct audioformat) { + .formats = SNDRV_PCM_FMTBIT_S32_LE, + .channels = 2, + .fmt_bits = 24, + .iface = 1, + .altsetting = 1, + .altset_idx = 1, + .endpoint = 0x01, + .ep_attr = USB_ENDPOINT_XFER_ISOC | + USB_ENDPOINT_SYNC_ASYNC, + .rates = SNDRV_PCM_RATE_48000 | + SNDRV_PCM_RATE_96000, + .rate_min = 48000, + .rate_max = 96000, + .nr_rates = 2, + .rate_table = (unsigned int[]) { 48000, 96000 }, + .clock = 0x29 + } + }, + /* Capture */ + { + .ifnum = 2, + .type = QUIRK_AUDIO_FIXED_ENDPOINT, + .data = &(const struct audioformat) { + .formats = SNDRV_PCM_FMTBIT_S32_LE, + .channels = 2, + .fmt_bits = 24, + .iface = 2, + .altsetting = 1, + .altset_idx = 1, + .endpoint = 0x82, + .ep_attr = USB_ENDPOINT_XFER_ISOC | + USB_ENDPOINT_SYNC_ASYNC | + USB_ENDPOINT_USAGE_IMPLICIT_FB, + .rates = SNDRV_PCM_RATE_48000 | + SNDRV_PCM_RATE_96000, + .rate_min = 48000, + .rate_max = 96000, + .nr_rates = 2, + .rate_table = (unsigned int[]) { 48000, 96000 }, + .clock = 0x29 + } + }, + { + .ifnum = -1 + } + } + } +}, #undef USB_DEVICE_VENDOR_SPEC #undef USB_AUDIO_DEVICE diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index a7bcae0a2c75b4..51138350f03c58 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -1915,6 +1915,8 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = { QUIRK_FLAG_ALIGN_TRANSFER), DEVICE_FLG(0x1224, 0x2a25, /* Jieli Technology USB PHY 2.0 */ QUIRK_FLAG_GET_SAMPLE_RATE), + DEVICE_FLG(0x2b53, 0x0031, /* Fiero SC-01 (firmware v1.1.0) */ + QUIRK_FLAG_GENERIC_IMPLICIT_FB), /* Vendor matches */ VENDOR_FLG(0x045e, /* MS Lifecam */ From 15ef4d686a585ac009ea91aa537f23777d8f34c7 Mon Sep 17 00:00:00 2001 From: Egor Vorontsov Date: Mon, 27 Jun 2022 13:00:35 +0300 Subject: [PATCH 0648/1056] ALSA: usb-audio: Add quirk for Fiero SC-01 (fw v1.0.0) [ Upstream commit 2307a0e1ca0b5c1337b37ac6302f96e017ebac3c ] The patch applies the same quirks used for SC-01 at firmware v1.1.0 to the ones running v1.0.0, with respect to hard-coded sample rates. I got two more units and successfully tested the patch series with both firmwares. The support is now complete (not accounting ASIO). Signed-off-by: Egor Vorontsov Link: https://lore.kernel.org/r/20220627100041.2861494-2-sdoregor@sdore.me Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/usb/quirks-table.h | 132 +++++++++++++++++++++++++++++++++++++++ sound/usb/quirks.c | 4 ++ 2 files changed, 136 insertions(+) diff --git a/sound/usb/quirks-table.h b/sound/usb/quirks-table.h index 7067d314fecd43..f93201a830b5a7 100644 --- a/sound/usb/quirks-table.h +++ b/sound/usb/quirks-table.h @@ -4167,6 +4167,138 @@ YAMAHA_DEVICE(0x7010, "UB99"), } } }, +{ + /* + * Fiero SC-01 (firmware v1.0.0 @ 48 kHz) + */ + USB_DEVICE(0x2b53, 0x0023), + .driver_info = (unsigned long) &(const struct snd_usb_audio_quirk) { + .vendor_name = "Fiero", + .product_name = "SC-01", + .ifnum = QUIRK_ANY_INTERFACE, + .type = QUIRK_COMPOSITE, + .data = &(const struct snd_usb_audio_quirk[]) { + { + .ifnum = 0, + .type = QUIRK_AUDIO_STANDARD_INTERFACE + }, + /* Playback */ + { + .ifnum = 1, + .type = QUIRK_AUDIO_FIXED_ENDPOINT, + .data = &(const struct audioformat) { + .formats = SNDRV_PCM_FMTBIT_S32_LE, + .channels = 2, + .fmt_bits = 24, + .iface = 1, + .altsetting = 1, + .altset_idx = 1, + .endpoint = 0x01, + .ep_attr = USB_ENDPOINT_XFER_ISOC | + USB_ENDPOINT_SYNC_ASYNC, + .rates = SNDRV_PCM_RATE_48000, + .rate_min = 48000, + .rate_max = 48000, + .nr_rates = 1, + .rate_table = (unsigned int[]) { 48000 }, + .clock = 0x29 + } + }, + /* Capture */ + { + .ifnum = 2, + .type = QUIRK_AUDIO_FIXED_ENDPOINT, + .data = &(const struct audioformat) { + .formats = SNDRV_PCM_FMTBIT_S32_LE, + .channels = 2, + .fmt_bits = 24, + .iface = 2, + .altsetting = 1, + .altset_idx = 1, + .endpoint = 0x82, + .ep_attr = USB_ENDPOINT_XFER_ISOC | + USB_ENDPOINT_SYNC_ASYNC | + USB_ENDPOINT_USAGE_IMPLICIT_FB, + .rates = SNDRV_PCM_RATE_48000, + .rate_min = 48000, + .rate_max = 48000, + .nr_rates = 1, + .rate_table = (unsigned int[]) { 48000 }, + .clock = 0x29 + } + }, + { + .ifnum = -1 + } + } + } +}, +{ + /* + * Fiero SC-01 (firmware v1.0.0 @ 96 kHz) + */ + USB_DEVICE(0x2b53, 0x0024), + .driver_info = (unsigned long) &(const struct snd_usb_audio_quirk) { + .vendor_name = "Fiero", + .product_name = "SC-01", + .ifnum = QUIRK_ANY_INTERFACE, + .type = QUIRK_COMPOSITE, + .data = &(const struct snd_usb_audio_quirk[]) { + { + .ifnum = 0, + .type = QUIRK_AUDIO_STANDARD_INTERFACE + }, + /* Playback */ + { + .ifnum = 1, + .type = QUIRK_AUDIO_FIXED_ENDPOINT, + .data = &(const struct audioformat) { + .formats = SNDRV_PCM_FMTBIT_S32_LE, + .channels = 2, + .fmt_bits = 24, + .iface = 1, + .altsetting = 1, + .altset_idx = 1, + .endpoint = 0x01, + .ep_attr = USB_ENDPOINT_XFER_ISOC | + USB_ENDPOINT_SYNC_ASYNC, + .rates = SNDRV_PCM_RATE_96000, + .rate_min = 96000, + .rate_max = 96000, + .nr_rates = 1, + .rate_table = (unsigned int[]) { 96000 }, + .clock = 0x29 + } + }, + /* Capture */ + { + .ifnum = 2, + .type = QUIRK_AUDIO_FIXED_ENDPOINT, + .data = &(const struct audioformat) { + .formats = SNDRV_PCM_FMTBIT_S32_LE, + .channels = 2, + .fmt_bits = 24, + .iface = 2, + .altsetting = 1, + .altset_idx = 1, + .endpoint = 0x82, + .ep_attr = USB_ENDPOINT_XFER_ISOC | + USB_ENDPOINT_SYNC_ASYNC | + USB_ENDPOINT_USAGE_IMPLICIT_FB, + .rates = SNDRV_PCM_RATE_96000, + .rate_min = 96000, + .rate_max = 96000, + .nr_rates = 1, + .rate_table = (unsigned int[]) { 96000 }, + .clock = 0x29 + } + }, + { + .ifnum = -1 + } + } + } +}, { /* * Fiero SC-01 (firmware v1.1.0) diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index 51138350f03c58..968d90caeefa09 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -1915,6 +1915,10 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = { QUIRK_FLAG_ALIGN_TRANSFER), DEVICE_FLG(0x1224, 0x2a25, /* Jieli Technology USB PHY 2.0 */ QUIRK_FLAG_GET_SAMPLE_RATE), + DEVICE_FLG(0x2b53, 0x0023, /* Fiero SC-01 (firmware v1.0.0 @ 48 kHz) */ + QUIRK_FLAG_GENERIC_IMPLICIT_FB), + DEVICE_FLG(0x2b53, 0x0024, /* Fiero SC-01 (firmware v1.0.0 @ 96 kHz) */ + QUIRK_FLAG_GENERIC_IMPLICIT_FB), DEVICE_FLG(0x2b53, 0x0031, /* Fiero SC-01 (firmware v1.1.0) */ QUIRK_FLAG_GENERIC_IMPLICIT_FB), From c01793517d8dc8de22c719176ba3c02b774d0bcd Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Tue, 5 Jul 2022 10:21:02 -0700 Subject: [PATCH 0649/1056] nvme-pci: phison e16 has bogus namespace ids [ Upstream commit 73029c9b23cf1213e5f54c2b59efce08665199e7 ] Add the quirk. Link: https://bugzilla.kernel.org/show_bug.cgi?id=216049 Reported-by: Chris Egolf Signed-off-by: Keith Busch Reviewed-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig Signed-off-by: Sasha Levin --- drivers/nvme/host/pci.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index c3db9f12dac3ab..d820131d39b29c 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -3337,7 +3337,8 @@ static const struct pci_device_id nvme_id_table[] = { NVME_QUIRK_DISABLE_WRITE_ZEROES| NVME_QUIRK_IGNORE_DEV_SUBNQN, }, { PCI_DEVICE(0x1987, 0x5016), /* Phison E16 */ - .driver_data = NVME_QUIRK_IGNORE_DEV_SUBNQN, }, + .driver_data = NVME_QUIRK_IGNORE_DEV_SUBNQN | + NVME_QUIRK_BOGUS_NID, }, { PCI_DEVICE(0x1b4b, 0x1092), /* Lexar 256 GB SSD */ .driver_data = NVME_QUIRK_NO_NS_DESC_LIST | NVME_QUIRK_IGNORE_DEV_SUBNQN, }, From ec0c62a23c50887232d99a364fdd348466ef1e47 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 6 Jul 2022 12:20:59 -0700 Subject: [PATCH 0650/1056] signal handling: don't use BUG_ON() for debugging [ Upstream commit a382f8fee42ca10c9bfce0d2352d4153f931f5dc ] These are indeed "should not happen" situations, but it turns out recent changes made the 'task_is_stopped_or_trace()' case trigger (fix for that exists, is pending more testing), and the BUG_ON() makes it unnecessarily hard to actually debug for no good reason. It's been that way for a long time, but let's make it clear: BUG_ON() is not good for debugging, and should never be used in situations where you could just say "this shouldn't happen, but we can continue". Use WARN_ON_ONCE() instead to make sure it gets logged, and then just continue running. Instead of making the system basically unusuable because you crashed the machine while potentially holding some very core locks (eg this function is commonly called while holding 'tasklist_lock' for writing). Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- kernel/signal.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/kernel/signal.c b/kernel/signal.c index d831f0aec56e65..c7dbb19219b9a5 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -2027,12 +2027,12 @@ bool do_notify_parent(struct task_struct *tsk, int sig) bool autoreap = false; u64 utime, stime; - BUG_ON(sig == -1); + WARN_ON_ONCE(sig == -1); - /* do_notify_parent_cldstop should have been called instead. */ - BUG_ON(task_is_stopped_or_traced(tsk)); + /* do_notify_parent_cldstop should have been called instead. */ + WARN_ON_ONCE(task_is_stopped_or_traced(tsk)); - BUG_ON(!tsk->ptrace && + WARN_ON_ONCE(!tsk->ptrace && (tsk->group_leader != tsk || !thread_group_empty(tsk))); /* Wake up all pidfd waiters */ From aa96257867c016e7de39be1b1bb8830c041b042b Mon Sep 17 00:00:00 2001 From: Lucien Buchmann Date: Sat, 25 Jun 2022 02:17:44 +0200 Subject: [PATCH 0651/1056] USB: serial: ftdi_sio: add Belimo device ids commit 7c239a071d1f04b7137789810807b4108d475c72 upstream. Those two product ids are known. Signed-off-by: Lucien Buchmann Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/ftdi_sio.c | 3 +++ drivers/usb/serial/ftdi_sio_ids.h | 6 ++++++ 2 files changed, 9 insertions(+) diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c index 7e852561d55ca8..e2a8c33f0cae95 100644 --- a/drivers/usb/serial/ftdi_sio.c +++ b/drivers/usb/serial/ftdi_sio.c @@ -1023,6 +1023,9 @@ static const struct usb_device_id id_table_combined[] = { { USB_DEVICE(FTDI_VID, CHETCO_SEASMART_DISPLAY_PID) }, { USB_DEVICE(FTDI_VID, CHETCO_SEASMART_LITE_PID) }, { USB_DEVICE(FTDI_VID, CHETCO_SEASMART_ANALOG_PID) }, + /* Belimo Automation devices */ + { USB_DEVICE(FTDI_VID, BELIMO_ZTH_PID) }, + { USB_DEVICE(FTDI_VID, BELIMO_ZIP_PID) }, /* ICP DAS I-756xU devices */ { USB_DEVICE(ICPDAS_VID, ICPDAS_I7560U_PID) }, { USB_DEVICE(ICPDAS_VID, ICPDAS_I7561U_PID) }, diff --git a/drivers/usb/serial/ftdi_sio_ids.h b/drivers/usb/serial/ftdi_sio_ids.h index d1a9564697a4be..4e92c165c86bf0 100644 --- a/drivers/usb/serial/ftdi_sio_ids.h +++ b/drivers/usb/serial/ftdi_sio_ids.h @@ -1568,6 +1568,12 @@ #define CHETCO_SEASMART_LITE_PID 0xA5AE /* SeaSmart Lite USB Adapter */ #define CHETCO_SEASMART_ANALOG_PID 0xA5AF /* SeaSmart Analog Adapter */ +/* + * Belimo Automation + */ +#define BELIMO_ZTH_PID 0x8050 +#define BELIMO_ZIP_PID 0xC811 + /* * Unjo AB */ From ea42ef3ef678a08906b4fe86c492d22063a0935f Mon Sep 17 00:00:00 2001 From: Linyu Yuan Date: Fri, 1 Jul 2022 16:08:54 +0800 Subject: [PATCH 0652/1056] usb: typec: add missing uevent when partner support PD commit 6fb9e1d94789e8ee5a258a23bc588693f743fd6c upstream. System like Android allow user control power role from UI, it is possible to implement application base on typec uevent to refresh UI, but found there is chance that UI show different state from typec attribute file. In typec_set_pwr_opmode(), when partner support PD, there is no uevent send to user space which cause the problem. Fix it by sending uevent notification when change power mode to PD. Fixes: bdecb33af34f ("usb: typec: API for controlling USB Type-C Multiplexers") Cc: stable@vger.kernel.org Signed-off-by: Linyu Yuan Link: https://lore.kernel.org/r/1656662934-10226-1-git-send-email-quic_linyyuan@quicinc.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/class.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/typec/class.c b/drivers/usb/typec/class.c index aeef453aa65851..ff6c14d7b1a832 100644 --- a/drivers/usb/typec/class.c +++ b/drivers/usb/typec/class.c @@ -1718,6 +1718,7 @@ void typec_set_pwr_opmode(struct typec_port *port, partner->usb_pd = 1; sysfs_notify(&partner_dev->kobj, NULL, "supports_usb_power_delivery"); + kobject_uevent(&partner_dev->kobj, KOBJ_CHANGE); } put_device(partner_dev); } From 3cb692555a0b40a8c13b6bb2807f4b53ab7fec74 Mon Sep 17 00:00:00 2001 From: Thinh Nguyen Date: Mon, 27 Jun 2022 18:41:19 -0700 Subject: [PATCH 0653/1056] usb: dwc3: gadget: Fix event pending check commit 7441b273388b9a59d8387a03ffbbca9d5af6348c upstream. The DWC3_EVENT_PENDING flag is used to protect against invalid call to top-half interrupt handler, which can occur when there's a delay in software detection of the interrupt line deassertion. However, the clearing of this flag was done prior to unmasking the interrupt line, creating opportunity where the top-half handler can come. This breaks the serialization and creates a race between the top-half and bottom-half handler, resulting in losing synchronization between the controller and the driver when processing events. To fix this, make sure the clearing of the DWC3_EVENT_PENDING is done at the end of the bottom-half handler. Fixes: d325a1de49d6 ("usb: dwc3: gadget: Prevent losing events in event cache") Cc: stable@vger.kernel.org Signed-off-by: Thinh Nguyen Link: https://lore.kernel.org/r/8670aaf1cf52e7d1e6df2a827af2d77263b93b75.1656380429.git.Thinh.Nguyen@synopsys.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/gadget.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 99d0372ed840e7..e534b98205ca65 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -4149,7 +4149,6 @@ static irqreturn_t dwc3_process_event_buf(struct dwc3_event_buffer *evt) } evt->count = 0; - evt->flags &= ~DWC3_EVENT_PENDING; ret = IRQ_HANDLED; /* Unmask interrupt */ @@ -4162,6 +4161,9 @@ static irqreturn_t dwc3_process_event_buf(struct dwc3_event_buffer *evt) dwc3_writel(dwc->regs, DWC3_DEV_IMOD(0), dwc->imod_interval); } + /* Keep the clearing of DWC3_EVENT_PENDING at the end */ + evt->flags &= ~DWC3_EVENT_PENDING; + return ret; } From 8b07c29987201228d94d16f769757d76867a001a Mon Sep 17 00:00:00 2001 From: Chanho Park Date: Mon, 27 Jun 2022 15:51:13 +0900 Subject: [PATCH 0654/1056] tty: serial: samsung_tty: set dma burst_size to 1 commit f7e35e4bf1e8dc2c8cbd5e0955dc1bd58558dae0 upstream. The src_maxburst and dst_maxburst have been changed to 1 but the settings of the UCON register aren't changed yet. They should be changed as well according to the dmaengine slave config. Fixes: aa2f80e752c7 ("serial: samsung: fix maxburst parameter for DMA transactions") Cc: stable Cc: Marek Szyprowski Reviewed-by: Krzysztof Kozlowski Signed-off-by: Chanho Park Link: https://lore.kernel.org/r/20220627065113.139520-1-chanho61.park@samsung.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/samsung_tty.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/tty/serial/samsung_tty.c b/drivers/tty/serial/samsung_tty.c index 319533b3c32a5e..f460b47ff6f2d6 100644 --- a/drivers/tty/serial/samsung_tty.c +++ b/drivers/tty/serial/samsung_tty.c @@ -378,8 +378,7 @@ static void enable_tx_dma(struct s3c24xx_uart_port *ourport) /* Enable tx dma mode */ ucon = rd_regl(port, S3C2410_UCON); ucon &= ~(S3C64XX_UCON_TXBURST_MASK | S3C64XX_UCON_TXMODE_MASK); - ucon |= (dma_get_cache_alignment() >= 16) ? - S3C64XX_UCON_TXBURST_16 : S3C64XX_UCON_TXBURST_1; + ucon |= S3C64XX_UCON_TXBURST_1; ucon |= S3C64XX_UCON_TXMODE_DMA; wr_regl(port, S3C2410_UCON, ucon); @@ -675,7 +674,7 @@ static void enable_rx_dma(struct s3c24xx_uart_port *ourport) S3C64XX_UCON_DMASUS_EN | S3C64XX_UCON_TIMEOUT_EN | S3C64XX_UCON_RXMODE_MASK); - ucon |= S3C64XX_UCON_RXBURST_16 | + ucon |= S3C64XX_UCON_RXBURST_1 | 0xf << S3C64XX_UCON_TIMEOUT_SHIFT | S3C64XX_UCON_EMPTYINT_EN | S3C64XX_UCON_TIMEOUT_EN | From 57964a5710252bc82fe22d9fa98c180c58c20244 Mon Sep 17 00:00:00 2001 From: Yangxi Xiang Date: Tue, 28 Jun 2022 17:33:22 +0800 Subject: [PATCH 0655/1056] vt: fix memory overlapping when deleting chars in the buffer commit 39cdb68c64d84e71a4a717000b6e5de208ee60cc upstream. A memory overlapping copy occurs when deleting a long line. This memory overlapping copy can cause data corruption when scr_memcpyw is optimized to memcpy because memcpy does not ensure its behavior if the destination buffer overlaps with the source buffer. The line buffer is not always broken, because the memcpy utilizes the hardware acceleration, whose result is not deterministic. Fix this problem by using replacing the scr_memcpyw with scr_memmovew. Fixes: 81732c3b2fed ("tty vt: Fix line garbage in virtual console on command line edition") Cc: stable Signed-off-by: Yangxi Xiang Link: https://lore.kernel.org/r/20220628093322.5688-1-xyangxi5@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/vt/vt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c index 7359c3e80d63e2..55283a7f973fb9 100644 --- a/drivers/tty/vt/vt.c +++ b/drivers/tty/vt/vt.c @@ -855,7 +855,7 @@ static void delete_char(struct vc_data *vc, unsigned int nr) unsigned short *p = (unsigned short *) vc->vc_pos; vc_uniscr_delete(vc, nr); - scr_memcpyw(p, p + nr, (vc->vc_cols - vc->state.x - nr) * 2); + scr_memmovew(p, p + nr, (vc->vc_cols - vc->state.x - nr) * 2); scr_memsetw(p + vc->vc_cols - vc->state.x - nr, vc->vc_video_erase_char, nr * 2); vc->vc_need_wrap = 0; From 10b27fa2d67c297ef7936872bd96987e529cfbd2 Mon Sep 17 00:00:00 2001 From: Yi Yang Date: Tue, 28 Jun 2022 16:35:15 +0800 Subject: [PATCH 0656/1056] serial: 8250: fix return error code in serial8250_request_std_resource() commit 6e690d54cfa802f939cefbd2fa2c91bd0b8bd1b6 upstream. If port->mapbase = NULL in serial8250_request_std_resource() , it need return a error code instead of 0. If uart_set_info() fail to request new regions by serial8250_request_std_resource() but the return value of serial8250_request_std_resource() is 0, The system incorrectly considers that the resource application is successful and does not attempt to restore the old setting. A null pointer reference is triggered when the port resource is later invoked. Signed-off-by: Yi Yang Cc: stable Link: https://lore.kernel.org/r/20220628083515.64138-1-yiyang13@huawei.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_port.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c index df9731f737466e..4f66825abe675b 100644 --- a/drivers/tty/serial/8250/8250_port.c +++ b/drivers/tty/serial/8250/8250_port.c @@ -2982,8 +2982,10 @@ static int serial8250_request_std_resource(struct uart_8250_port *up) case UPIO_MEM32BE: case UPIO_MEM16: case UPIO_MEM: - if (!port->mapbase) + if (!port->mapbase) { + ret = -EINVAL; break; + } if (!request_mem_region(port->mapbase, size, "serial")) { ret = -EBUSY; From 8ef116a2a7383a390be42fd8554dc1680cc0ea77 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Mon, 27 Jun 2022 18:07:52 +0300 Subject: [PATCH 0657/1056] serial: stm32: Clear prev values before setting RTS delays MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 5c5f44e36217de5ead789ff25da71c31c2331c96 upstream. The code lacks clearing of previous DEAT/DEDT values. Thus, changing values on the fly results in garbage delays tending towards the maximum value as more and more bits are ORed together. (Leaving RS485 mode would have cleared the old values though). Fixes: 1bcda09d2910 ("serial: stm32: add support for RS485 hardware control mode") Cc: stable Signed-off-by: Ilpo Järvinen Link: https://lore.kernel.org/r/20220627150753.34510-1-ilpo.jarvinen@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/stm32-usart.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/tty/serial/stm32-usart.c b/drivers/tty/serial/stm32-usart.c index 10e9f983de6217..fc166cc2c856d9 100644 --- a/drivers/tty/serial/stm32-usart.c +++ b/drivers/tty/serial/stm32-usart.c @@ -71,6 +71,8 @@ static void stm32_usart_config_reg_rs485(u32 *cr1, u32 *cr3, u32 delay_ADE, *cr3 |= USART_CR3_DEM; over8 = *cr1 & USART_CR1_OVER8; + *cr1 &= ~(USART_CR1_DEDT_MASK | USART_CR1_DEAT_MASK); + if (over8) rs485_deat_dedt = delay_ADE * baud * 8; else From 2db3b95166f72e6481a79b82b1d6f94f4b18fcc1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Tue, 14 Jun 2022 10:56:37 +0300 Subject: [PATCH 0658/1056] serial: pl011: UPSTAT_AUTORTS requires .throttle/unthrottle MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 211565b100993c90b53bf40851eacaefc830cfe0 upstream. The driver must provide throttle and unthrottle in uart_ops when it sets UPSTAT_AUTORTS. Add them using existing stop_rx & enable_interrupts functions. Fixes: 2a76fa283098 (serial: pl011: Adopt generic flag to store auto RTS status) Cc: stable Cc: Lukas Wunner Reported-by: Nuno Gonçalves Tested-by: Nuno Gonçalves Signed-off-by: Ilpo Järvinen Link: https://lore.kernel.org/r/20220614075637.8558-1-ilpo.jarvinen@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/amba-pl011.c | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/drivers/tty/serial/amba-pl011.c b/drivers/tty/serial/amba-pl011.c index 0e908061b5d7d8..300a8bbb4b8071 100644 --- a/drivers/tty/serial/amba-pl011.c +++ b/drivers/tty/serial/amba-pl011.c @@ -1372,6 +1372,15 @@ static void pl011_stop_rx(struct uart_port *port) pl011_dma_rx_stop(uap); } +static void pl011_throttle_rx(struct uart_port *port) +{ + unsigned long flags; + + spin_lock_irqsave(&port->lock, flags); + pl011_stop_rx(port); + spin_unlock_irqrestore(&port->lock, flags); +} + static void pl011_enable_ms(struct uart_port *port) { struct uart_amba_port *uap = @@ -1793,9 +1802,10 @@ static int pl011_allocate_irq(struct uart_amba_port *uap) */ static void pl011_enable_interrupts(struct uart_amba_port *uap) { + unsigned long flags; unsigned int i; - spin_lock_irq(&uap->port.lock); + spin_lock_irqsave(&uap->port.lock, flags); /* Clear out any spuriously appearing RX interrupts */ pl011_write(UART011_RTIS | UART011_RXIS, uap, REG_ICR); @@ -1817,7 +1827,14 @@ static void pl011_enable_interrupts(struct uart_amba_port *uap) if (!pl011_dma_rx_running(uap)) uap->im |= UART011_RXIM; pl011_write(uap->im, uap, REG_IMSC); - spin_unlock_irq(&uap->port.lock); + spin_unlock_irqrestore(&uap->port.lock, flags); +} + +static void pl011_unthrottle_rx(struct uart_port *port) +{ + struct uart_amba_port *uap = container_of(port, struct uart_amba_port, port); + + pl011_enable_interrupts(uap); } static int pl011_startup(struct uart_port *port) @@ -2245,6 +2262,8 @@ static const struct uart_ops amba_pl011_pops = { .stop_tx = pl011_stop_tx, .start_tx = pl011_start_tx, .stop_rx = pl011_stop_rx, + .throttle = pl011_throttle_rx, + .unthrottle = pl011_unthrottle_rx, .enable_ms = pl011_enable_ms, .break_ctl = pl011_break_ctl, .startup = pl011_startup, From 190ce5cdc55d1b66ea582ac2be6fd5a72e3cc486 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Wed, 29 Jun 2022 12:48:41 +0300 Subject: [PATCH 0659/1056] serial: 8250: Fix PM usage_count for console handover MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit f9b11229b79c0fb2100b5bb4628a101b1d37fbf6 upstream. When console is enabled, univ8250_console_setup() calls serial8250_console_setup() before .dev is set to uart_port. Therefore, it will not call pm_runtime_get_sync(). Later, when the actual driver is going to take over univ8250_console_exit() is called. As .dev is already set, serial8250_console_exit() makes pm_runtime_put_sync() call with usage count being zero triggering PM usage count warning (extra debug for univ8250_console_setup(), univ8250_console_exit(), and serial8250_register_ports()): [ 0.068987] univ8250_console_setup ttyS0 nodev [ 0.499670] printk: console [ttyS0] enabled [ 0.717955] printk: console [ttyS0] printing thread started [ 1.960163] serial8250_register_ports assigned dev for ttyS0 [ 1.976830] printk: console [ttyS0] disabled [ 1.976888] printk: console [ttyS0] printing thread stopped [ 1.977073] univ8250_console_exit ttyS0 usage:0 [ 1.977075] serial8250 serial8250: Runtime PM usage count underflow! [ 1.977429] dw-apb-uart.6: ttyS0 at MMIO 0x4010006000 (irq = 33, base_baud = 115200) is a 16550A [ 1.977812] univ8250_console_setup ttyS0 usage:2 [ 1.978167] printk: console [ttyS0] printing thread started [ 1.978203] printk: console [ttyS0] enabled To fix the issue, call pm_runtime_get_sync() in serial8250_register_ports() as soon as .dev is set for an uart_port if it has console enabled. This problem became apparent only recently because 82586a721595 ("PM: runtime: Avoid device usage count underflows") added the warning printout. I confirmed this problem also occurs with v5.18 (w/o the warning printout, obviously). Fixes: bedb404e91bb ("serial: 8250_port: Don't use power management for kernel console") Cc: stable Tested-by: Tony Lindgren Reviewed-by: Andy Shevchenko Reviewed-by: Tony Lindgren Signed-off-by: Ilpo Järvinen Link: https://lore.kernel.org/r/b4f428e9-491f-daf2-2232-819928dc276e@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_core.c | 4 ++++ drivers/tty/serial/serial_core.c | 5 ----- include/linux/serial_core.h | 5 +++++ 3 files changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/tty/serial/8250/8250_core.c b/drivers/tty/serial/8250/8250_core.c index 1ce193daea7f15..30b7890645ac0f 100644 --- a/drivers/tty/serial/8250/8250_core.c +++ b/drivers/tty/serial/8250/8250_core.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include @@ -561,6 +562,9 @@ serial8250_register_ports(struct uart_driver *drv, struct device *dev) up->port.dev = dev; + if (uart_console_enabled(&up->port)) + pm_runtime_get_sync(up->port.dev); + serial8250_apply_quirks(up); uart_add_one_port(drv, &up->port); } diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c index 4d3ad4c6c60f0a..82ddbb92d07da5 100644 --- a/drivers/tty/serial/serial_core.c +++ b/drivers/tty/serial/serial_core.c @@ -1912,11 +1912,6 @@ static int uart_proc_show(struct seq_file *m, void *v) } #endif -static inline bool uart_console_enabled(struct uart_port *port) -{ - return uart_console(port) && (port->cons->flags & CON_ENABLED); -} - static void uart_port_spin_lock_init(struct uart_port *port) { spin_lock_init(&port->lock); diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index 8c32935e1059d5..6d07b5f9e3b817 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -388,6 +388,11 @@ static const bool earlycon_acpi_spcr_enable EARLYCON_USED_OR_UNUSED; static inline int setup_earlycon(char *buf) { return 0; } #endif +static inline bool uart_console_enabled(struct uart_port *port) +{ + return uart_console(port) && (port->cons->flags & CON_ENABLED); +} + struct uart_port *uart_get_console(struct uart_port *ports, int nr, struct console *c); int uart_parse_earlycon(char *p, unsigned char *iotype, resource_size_t *addr, From eac7fd3ca397ef9538863bfc276b6e0aca92ab81 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Fri, 8 Jul 2022 15:14:56 +0200 Subject: [PATCH 0660/1056] x86/pat: Fix x86_has_pat_wp() commit 230ec83d4299b30c51a1c133b4f2a669972cc08a upstream. x86_has_pat_wp() is using a wrong test, as it relies on the normal PAT configuration used by the kernel. In case the PAT MSR has been setup by another entity (e.g. Xen hypervisor) it might return false even if the PAT configuration is allowing WP mappings. This due to the fact that when running as Xen PV guest the PAT MSR is setup by the hypervisor and cannot be changed by the guest. This results in the WP related entry to be at a different position when running as Xen PV guest compared to the bare metal or fully virtualized case. The correct way to test for WP support is: 1. Get the PTE protection bits needed to select WP mode by reading __cachemode2pte_tbl[_PAGE_CACHE_MODE_WP] (depending on the PAT MSR setting this might return protection bits for a stronger mode, e.g. UC-) 2. Translate those bits back into the real cache mode selected by those PTE bits by reading __pte2cachemode_tbl[__pte2cm_idx(prot)] 3. Test for the cache mode to be _PAGE_CACHE_MODE_WP Fixes: f88a68facd9a ("x86/mm: Extend early_memremap() support with additional attrs") Signed-off-by: Juergen Gross Signed-off-by: Borislav Petkov Cc: # 4.14 Link: https://lore.kernel.org/r/20220503132207.17234-1-jgross@suse.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/mm/init.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 23a14d82e7838e..0e3667e529abbe 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -78,10 +78,20 @@ static uint8_t __pte2cachemode_tbl[8] = { [__pte2cm_idx(_PAGE_PWT | _PAGE_PCD | _PAGE_PAT)] = _PAGE_CACHE_MODE_UC, }; -/* Check that the write-protect PAT entry is set for write-protect */ +/* + * Check that the write-protect PAT entry is set for write-protect. + * To do this without making assumptions how PAT has been set up (Xen has + * another layout than the kernel), translate the _PAGE_CACHE_MODE_WP cache + * mode via the __cachemode2pte_tbl[] into protection bits (those protection + * bits will select a cache mode of WP or better), and then translate the + * protection bits back into the cache mode using __pte2cm_idx() and the + * __pte2cachemode_tbl[] array. This will return the really used cache mode. + */ bool x86_has_pat_wp(void) { - return __pte2cachemode_tbl[_PAGE_CACHE_MODE_WP] == _PAGE_CACHE_MODE_WP; + uint16_t prot = __cachemode2pte_tbl[_PAGE_CACHE_MODE_WP]; + + return __pte2cachemode_tbl[__pte2cm_idx(prot)] == _PAGE_CACHE_MODE_WP; } enum page_cache_mode pgprot2cachemode(pgprot_t pgprot) From 31f351eb534e889d11cd149de547d99eb5a15c64 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Fri, 17 Jun 2022 14:10:27 +0200 Subject: [PATCH 0661/1056] drm/aperture: Run fbdev removal before internal helpers commit bf43e4521ff3223a613f3a496991a22a4d78e04b upstream. Always run fbdev removal first to remove simpledrm via sysfb_disable(). This clears the internal state. The later call to drm_aperture_detach_drivers() then does nothing. Otherwise, with drm_aperture_detach_drivers() running first, the call to sysfb_disable() uses inconsistent state. Example backtrace show below: [ 11.663422] ================================================================== [ 11.663426] BUG: KASAN: use-after-free in device_del+0x79/0x5f0 [ 11.663435] Read of size 8 at addr ffff888108185050 by task systemd-udevd/311 [ 11.663440] CPU: 0 PID: 311 Comm: systemd-udevd Tainted: G E 5 .19.0-rc2-1-default+ #1689 [ 11.663445] Hardware name: HP ProLiant DL120 G7, BIOS J01 04/21/2011 [ 11.663447] Call Trace: [ 11.663449] [ 11.663451] ? device_del+0x79/0x5f0 [ 11.663456] dump_stack_lvl+0x5b/0x73 [ 11.663462] print_address_description.constprop.0+0x1f/0x1b0 [ 11.663468] ? device_del+0x79/0x5f0 [ 11.663471] ? device_del+0x79/0x5f0 [ 11.663475] print_report.cold+0x3c/0x21c [ 11.663481] ? lock_acquired+0x87/0x1e0 [ 11.663484] ? lock_acquired+0x87/0x1e0 [ 11.663489] ? device_del+0x79/0x5f0 [ 11.663492] kasan_report+0xbf/0xf0 [ 11.663498] ? device_del+0x79/0x5f0 [ 11.663503] device_del+0x79/0x5f0 [ 11.663509] ? device_remove_attrs+0x170/0x170 [ 11.663514] ? lock_is_held_type+0xe8/0x140 [ 11.663523] platform_device_del.part.0+0x19/0xe0 [ 11.663530] platform_device_unregister+0x1c/0x30 [ 11.663535] sysfb_disable+0x2d/0x70 [ 11.663540] remove_conflicting_framebuffers+0x1c/0xf0 [ 11.663546] remove_conflicting_pci_framebuffers+0x130/0x1a0 [ 11.663554] drm_aperture_remove_conflicting_pci_framebuffers+0x86/0xb0 [ 11.663561] ? mgag200_pci_remove+0x30/0x30 [mgag200] [ 11.663578] mgag200_pci_probe+0x2d/0x140 [mgag200] Reported-by: Zack Rusin Signed-off-by: Thomas Zimmermann Reviewed-by: Javier Martinez Canillas Reviewed-by: Zack Rusin Fixes: ee7a69aa38d8 ("fbdev: Disable sysfb device registration when removing conflicting FBs") Cc: Javier Martinez Canillas Cc: Daniel Vetter Cc: Daniel Vetter Cc: Sam Ravnborg Cc: Helge Deller Cc: Thomas Zimmermann Cc: Alex Deucher Cc: Zhen Lei Cc: Changcheng Deng Link: https://patchwork.freedesktop.org/patch/msgid/20220617121027.30273-1-tzimmermann@suse.de (cherry picked from commit fb84efa28a48e30b87fa1122e8aab8016c7347cd) Signed-off-by: Thomas Zimmermann Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/drm_aperture.c | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/drm_aperture.c b/drivers/gpu/drm/drm_aperture.c index 74bd4a76b253cf..059fd71424f6b6 100644 --- a/drivers/gpu/drm/drm_aperture.c +++ b/drivers/gpu/drm/drm_aperture.c @@ -329,7 +329,20 @@ int drm_aperture_remove_conflicting_pci_framebuffers(struct pci_dev *pdev, const struct drm_driver *req_driver) { resource_size_t base, size; - int bar, ret = 0; + int bar, ret; + + /* + * WARNING: Apparently we must kick fbdev drivers before vgacon, + * otherwise the vga fbdev driver falls over. + */ +#if IS_REACHABLE(CONFIG_FB) + ret = remove_conflicting_pci_framebuffers(pdev, req_driver->name); + if (ret) + return ret; +#endif + ret = vga_remove_vgacon(pdev); + if (ret) + return ret; for (bar = 0; bar < PCI_STD_NUM_BARS; ++bar) { if (!(pci_resource_flags(pdev, bar) & IORESOURCE_MEM)) @@ -339,15 +352,6 @@ int drm_aperture_remove_conflicting_pci_framebuffers(struct pci_dev *pdev, drm_aperture_detach_drivers(base, size); } - /* - * WARNING: Apparently we must kick fbdev drivers before vgacon, - * otherwise the vga fbdev driver falls over. - */ -#if IS_REACHABLE(CONFIG_FB) - ret = remove_conflicting_pci_framebuffers(pdev, req_driver->name); -#endif - if (ret == 0) - ret = vga_remove_vgacon(pdev); - return ret; + return 0; } EXPORT_SYMBOL(drm_aperture_remove_conflicting_pci_framebuffers); From 760adb59f6211e157dd587927ac26c42abc81550 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 21 Jul 2022 21:24:44 +0200 Subject: [PATCH 0662/1056] Linux 5.15.56 Link: https://lore.kernel.org/r/20220719114656.750574879@linuxfoundation.org Tested-by: Florian Fainelli Tested-by: Guenter Roeck Tested-by: Ron Economos Tested-by: Jon Hunter Tested-by: Bagas Sanjaya Tested-by: Linux Kernel Functional Testing Tested-by: Sudip Mukherjee Signed-off-by: Greg Kroah-Hartman --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index c95f0f59885fdf..2ce44168b1b543 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 5 PATCHLEVEL = 15 -SUBLEVEL = 55 +SUBLEVEL = 56 EXTRAVERSION = NAME = Trick or Treat From 01bc8bd64c19a26940a8a68b7df1700cd6403778 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Thu, 21 Apr 2022 22:10:48 +0800 Subject: [PATCH 0663/1056] x86/traps: Use pt_regs directly in fixup_bad_iret() commit 0aca53c6b522f8d6e2681ca875acbbe105f5fdcf upstream. Always stash the address error_entry() is going to return to, in %r12 and get rid of the void *error_entry_ret; slot in struct bad_iret_stack which was supposed to account for it and pt_regs pushed on the stack. After this, both fixup_bad_iret() and sync_regs() can work on a struct pt_regs pointer directly. [ bp: Rewrite commit message, touch ups. ] Signed-off-by: Lai Jiangshan Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/20220503032107.680190-2-jiangshanlai@gmail.com Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Greg Kroah-Hartman --- arch/x86/entry/entry_64.S | 5 ++++- arch/x86/include/asm/traps.h | 2 +- arch/x86/kernel/traps.c | 19 +++++++------------ 3 files changed, 12 insertions(+), 14 deletions(-) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 3acf0af4930559..1b4796af97a05d 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -1041,9 +1041,12 @@ SYM_CODE_START_LOCAL(error_entry) * Pretend that the exception came from user mode: set up pt_regs * as if we faulted immediately after IRET. */ - mov %rsp, %rdi + popq %r12 /* save return addr in %12 */ + movq %rsp, %rdi /* arg0 = pt_regs pointer */ call fixup_bad_iret mov %rax, %rsp + ENCODE_FRAME_POINTER + pushq %r12 jmp .Lerror_entry_from_usermode_after_swapgs SYM_CODE_END(error_entry) diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h index 6221be7cafc3b2..1cdd7e8bcba783 100644 --- a/arch/x86/include/asm/traps.h +++ b/arch/x86/include/asm/traps.h @@ -13,7 +13,7 @@ #ifdef CONFIG_X86_64 asmlinkage __visible notrace struct pt_regs *sync_regs(struct pt_regs *eregs); asmlinkage __visible notrace -struct bad_iret_stack *fixup_bad_iret(struct bad_iret_stack *s); +struct pt_regs *fixup_bad_iret(struct pt_regs *bad_regs); void __init trap_init(void); asmlinkage __visible noinstr struct pt_regs *vc_switch_off_ist(struct pt_regs *eregs); #endif diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 928e1ac820e67c..ca47080e37741f 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -762,14 +762,10 @@ asmlinkage __visible noinstr struct pt_regs *vc_switch_off_ist(struct pt_regs *r } #endif -struct bad_iret_stack { - void *error_entry_ret; - struct pt_regs regs; -}; - -asmlinkage __visible noinstr -struct bad_iret_stack *fixup_bad_iret(struct bad_iret_stack *s) +asmlinkage __visible noinstr struct pt_regs *fixup_bad_iret(struct pt_regs *bad_regs) { + struct pt_regs tmp, *new_stack; + /* * This is called from entry_64.S early in handling a fault * caused by a bad iret to user mode. To handle the fault @@ -778,19 +774,18 @@ struct bad_iret_stack *fixup_bad_iret(struct bad_iret_stack *s) * just below the IRET frame) and we want to pretend that the * exception came from the IRET target. */ - struct bad_iret_stack tmp, *new_stack = - (struct bad_iret_stack *)__this_cpu_read(cpu_tss_rw.x86_tss.sp0) - 1; + new_stack = (struct pt_regs *)__this_cpu_read(cpu_tss_rw.x86_tss.sp0) - 1; /* Copy the IRET target to the temporary storage. */ - __memcpy(&tmp.regs.ip, (void *)s->regs.sp, 5*8); + __memcpy(&tmp.ip, (void *)bad_regs->sp, 5*8); /* Copy the remainder of the stack from the current stack. */ - __memcpy(&tmp, s, offsetof(struct bad_iret_stack, regs.ip)); + __memcpy(&tmp, bad_regs, offsetof(struct pt_regs, ip)); /* Update the entry stack */ __memcpy(new_stack, &tmp, sizeof(tmp)); - BUG_ON(!user_mode(&new_stack->regs)); + BUG_ON(!user_mode(new_stack)); return new_stack; } #endif From 19ac6c99a17e879d2f43f7650bf824f5fbfcfa0a Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Thu, 21 Apr 2022 22:10:49 +0800 Subject: [PATCH 0664/1056] x86/entry: Switch the stack after error_entry() returns commit 520a7e80c96d655fbe4650d9cc985bd9d0443389 upstream. error_entry() calls fixup_bad_iret() before sync_regs() if it is a fault from a bad IRET, to copy pt_regs to the kernel stack. It switches to the kernel stack directly after sync_regs(). But error_entry() itself is also a function call, so it has to stash the address it is going to return to, in %r12 which is unnecessarily complicated. Move the stack switching after error_entry() and get rid of the need to handle the return address. [ bp: Massage commit message. ] Signed-off-by: Lai Jiangshan Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/20220503032107.680190-3-jiangshanlai@gmail.com Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Greg Kroah-Hartman --- arch/x86/entry/entry_64.S | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 1b4796af97a05d..ce1b436416e019 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -323,6 +323,8 @@ SYM_CODE_END(ret_from_fork) .macro idtentry_body cfunc has_error_code:req call error_entry + movq %rax, %rsp /* switch to the task stack if from userspace */ + ENCODE_FRAME_POINTER UNWIND_HINT_REGS movq %rsp, %rdi /* pt_regs pointer into 1st argument*/ @@ -982,14 +984,10 @@ SYM_CODE_START_LOCAL(error_entry) /* We have user CR3. Change to kernel CR3. */ SWITCH_TO_KERNEL_CR3 scratch_reg=%rax + leaq 8(%rsp), %rdi /* arg0 = pt_regs pointer */ .Lerror_entry_from_usermode_after_swapgs: /* Put us onto the real thread stack. */ - popq %r12 /* save return addr in %12 */ - movq %rsp, %rdi /* arg0 = pt_regs pointer */ call sync_regs - movq %rax, %rsp /* switch stack */ - ENCODE_FRAME_POINTER - pushq %r12 RET /* @@ -1021,6 +1019,7 @@ SYM_CODE_START_LOCAL(error_entry) */ .Lerror_entry_done_lfence: FENCE_SWAPGS_KERNEL_ENTRY + leaq 8(%rsp), %rax /* return pt_regs pointer */ RET .Lbstep_iret: @@ -1041,12 +1040,9 @@ SYM_CODE_START_LOCAL(error_entry) * Pretend that the exception came from user mode: set up pt_regs * as if we faulted immediately after IRET. */ - popq %r12 /* save return addr in %12 */ - movq %rsp, %rdi /* arg0 = pt_regs pointer */ + leaq 8(%rsp), %rdi /* arg0 = pt_regs pointer */ call fixup_bad_iret - mov %rax, %rsp - ENCODE_FRAME_POINTER - pushq %r12 + mov %rax, %rdi jmp .Lerror_entry_from_usermode_after_swapgs SYM_CODE_END(error_entry) From cac414684be9adb6b8bb7e271b66d4ffd589769f Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Thu, 21 Apr 2022 22:10:50 +0800 Subject: [PATCH 0665/1056] x86/entry: Move PUSH_AND_CLEAR_REGS out of error_entry() commit ee774dac0da1543376a69fd90840af6aa86879b3 upstream. The macro idtentry() (through idtentry_body()) calls error_entry() unconditionally even on XENPV. But XENPV needs to only push and clear regs. PUSH_AND_CLEAR_REGS in error_entry() makes the stack not return to its original place when the function returns, which means it is not possible to convert it to a C function. Carve out PUSH_AND_CLEAR_REGS out of error_entry() and into a separate function and call it before error_entry() in order to avoid calling error_entry() on XENPV. It will also allow for error_entry() to be converted to C code that can use inlined sync_regs() and save a function call. [ bp: Massage commit message. ] Signed-off-by: Lai Jiangshan Signed-off-by: Borislav Petkov Reviewed-by: Juergen Gross Link: https://lore.kernel.org/r/20220503032107.680190-4-jiangshanlai@gmail.com Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Greg Kroah-Hartman --- arch/x86/entry/entry_64.S | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index ce1b436416e019..a2165b5759a11d 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -315,6 +315,14 @@ SYM_CODE_END(ret_from_fork) #endif .endm +/* Save all registers in pt_regs */ +SYM_CODE_START_LOCAL(push_and_clear_regs) + UNWIND_HINT_FUNC + PUSH_AND_CLEAR_REGS save_ret=1 + ENCODE_FRAME_POINTER 8 + RET +SYM_CODE_END(push_and_clear_regs) + /** * idtentry_body - Macro to emit code calling the C function * @cfunc: C function to be called @@ -322,6 +330,9 @@ SYM_CODE_END(ret_from_fork) */ .macro idtentry_body cfunc has_error_code:req + call push_and_clear_regs + UNWIND_HINT_REGS + call error_entry movq %rax, %rsp /* switch to the task stack if from userspace */ ENCODE_FRAME_POINTER @@ -965,13 +976,11 @@ SYM_CODE_START_LOCAL(paranoid_exit) SYM_CODE_END(paranoid_exit) /* - * Save all registers in pt_regs, and switch GS if needed. + * Switch GS and CR3 if needed. */ SYM_CODE_START_LOCAL(error_entry) UNWIND_HINT_FUNC cld - PUSH_AND_CLEAR_REGS save_ret=1 - ENCODE_FRAME_POINTER 8 testb $3, CS+8(%rsp) jz .Lerror_kernelspace From 1fd333eb3377361dfeb3c6fed577393f0a7e2c74 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Tue, 3 May 2022 11:21:06 +0800 Subject: [PATCH 0666/1056] x86/entry: Don't call error_entry() for XENPV commit 64cbd0acb58203fb769ed2f4eab526d43e243847 upstream. XENPV guests enter already on the task stack and they can't fault for native_iret() nor native_load_gs_index() since they use their own pvop for IRET and load_gs_index(). A CR3 switch is not needed either. So there is no reason to call error_entry() in XENPV. [ bp: Massage commit message. ] Signed-off-by: Lai Jiangshan Signed-off-by: Borislav Petkov Reviewed-by: Juergen Gross Link: https://lore.kernel.org/r/20220503032107.680190-6-jiangshanlai@gmail.com Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Greg Kroah-Hartman --- arch/x86/entry/entry_64.S | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index a2165b5759a11d..6ff78841b975c0 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -333,8 +333,17 @@ SYM_CODE_END(push_and_clear_regs) call push_and_clear_regs UNWIND_HINT_REGS - call error_entry - movq %rax, %rsp /* switch to the task stack if from userspace */ + /* + * Call error_entry() and switch to the task stack if from userspace. + * + * When in XENPV, it is already in the task stack, and it can't fault + * for native_iret() nor native_load_gs_index() since XENPV uses its + * own pvops for IRET and load_gs_index(). And it doesn't need to + * switch the CR3. So it can skip invoking error_entry(). + */ + ALTERNATIVE "call error_entry; movq %rax, %rsp", \ + "", X86_FEATURE_XENPV + ENCODE_FRAME_POINTER UNWIND_HINT_REGS From 29e6b52efca13369e8ce35abeabbcb105554c0f0 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 26 Oct 2021 14:01:33 +0200 Subject: [PATCH 0667/1056] objtool: Classify symbols commit 1739c66eb7bd5f27f1b69a5a26e10e8327d1e136 upstream. In order to avoid calling str*cmp() on symbol names, over and over, do them all once upfront and store the result. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Borislav Petkov Acked-by: Josh Poimboeuf Tested-by: Alexei Starovoitov Link: https://lore.kernel.org/r/20211026120309.658539311@infradead.org [cascardo: no pv_target on struct symbol, because of missing db2b0c5d7b6f19b3c2cab08c531b65342eb5252b] Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Greg Kroah-Hartman --- tools/objtool/check.c | 34 +++++++++++++++++++---------- tools/objtool/include/objtool/elf.h | 7 ++++-- 2 files changed, 27 insertions(+), 14 deletions(-) diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 8b3435af989a27..236f34ea1be270 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -859,8 +859,7 @@ static void add_call_dest(struct objtool_file *file, struct instruction *insn, * so they need a little help, NOP out any KCOV calls from noinstr * text. */ - if (insn->sec->noinstr && - !strncmp(insn->call_dest->name, "__sanitizer_cov_", 16)) { + if (insn->sec->noinstr && insn->call_dest->kcov) { if (reloc) { reloc->type = R_NONE; elf_write_reloc(file->elf, reloc); @@ -884,7 +883,7 @@ static void add_call_dest(struct objtool_file *file, struct instruction *insn, } } - if (mcount && !strcmp(insn->call_dest->name, "__fentry__")) { + if (mcount && insn->call_dest->fentry) { if (sibling) WARN_FUNC("Tail call to __fentry__ !?!?", insn->sec, insn->offset); @@ -934,7 +933,7 @@ static int add_jump_destinations(struct objtool_file *file) } else if (reloc->sym->type == STT_SECTION) { dest_sec = reloc->sym->sec; dest_off = arch_dest_reloc_offset(reloc->addend); - } else if (arch_is_retpoline(reloc->sym)) { + } else if (reloc->sym->retpoline_thunk) { /* * Retpoline jumps are really dynamic jumps in * disguise, so convert them accordingly. @@ -1075,7 +1074,7 @@ static int add_call_destinations(struct objtool_file *file) add_call_dest(file, insn, dest, false); - } else if (arch_is_retpoline(reloc->sym)) { + } else if (reloc->sym->retpoline_thunk) { /* * Retpoline calls are really dynamic calls in * disguise, so convert them accordingly. @@ -1759,17 +1758,28 @@ static int read_intra_function_calls(struct objtool_file *file) return 0; } -static int read_static_call_tramps(struct objtool_file *file) +static int classify_symbols(struct objtool_file *file) { struct section *sec; struct symbol *func; for_each_sec(file, sec) { list_for_each_entry(func, &sec->symbol_list, list) { - if (func->bind == STB_GLOBAL && - !strncmp(func->name, STATIC_CALL_TRAMP_PREFIX_STR, + if (func->bind != STB_GLOBAL) + continue; + + if (!strncmp(func->name, STATIC_CALL_TRAMP_PREFIX_STR, strlen(STATIC_CALL_TRAMP_PREFIX_STR))) func->static_call_tramp = true; + + if (arch_is_retpoline(func)) + func->retpoline_thunk = true; + + if (!strcmp(func->name, "__fentry__")) + func->fentry = true; + + if (!strncmp(func->name, "__sanitizer_cov_", 16)) + func->kcov = true; } } @@ -1831,7 +1841,7 @@ static int decode_sections(struct objtool_file *file) /* * Must be before add_{jump_call}_destination. */ - ret = read_static_call_tramps(file); + ret = classify_symbols(file); if (ret) return ret; @@ -1889,9 +1899,9 @@ static int decode_sections(struct objtool_file *file) static bool is_fentry_call(struct instruction *insn) { - if (insn->type == INSN_CALL && insn->call_dest && - insn->call_dest->type == STT_NOTYPE && - !strcmp(insn->call_dest->name, "__fentry__")) + if (insn->type == INSN_CALL && + insn->call_dest && + insn->call_dest->fentry) return true; return false; diff --git a/tools/objtool/include/objtool/elf.h b/tools/objtool/include/objtool/elf.h index 4443652856187a..d3915f5e0c736b 100644 --- a/tools/objtool/include/objtool/elf.h +++ b/tools/objtool/include/objtool/elf.h @@ -54,8 +54,11 @@ struct symbol { unsigned long offset; unsigned int len; struct symbol *pfunc, *cfunc, *alias; - bool uaccess_safe; - bool static_call_tramp; + u8 uaccess_safe : 1; + u8 static_call_tramp : 1; + u8 retpoline_thunk : 1; + u8 fentry : 1; + u8 kcov : 1; }; struct reloc { From c9fd00d8e622a681cf0e39b0844e4da85364d6c4 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 26 Oct 2021 14:01:34 +0200 Subject: [PATCH 0668/1056] objtool: Explicitly avoid self modifying code in .altinstr_replacement commit dd003edeffa3cb87bc9862582004f405d77d7670 upstream. Assume ALTERNATIVE()s know what they're doing and do not change, or cause to change, instructions in .altinstr_replacement sections. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Borislav Petkov Acked-by: Josh Poimboeuf Tested-by: Alexei Starovoitov Link: https://lore.kernel.org/r/20211026120309.722511775@infradead.org [cascardo: context adjustment] Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Greg Kroah-Hartman --- tools/objtool/check.c | 43 ++++++++++++++++++++++++++++++++----------- 1 file changed, 32 insertions(+), 11 deletions(-) diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 236f34ea1be270..dcceffe8975194 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -840,18 +840,27 @@ static void remove_insn_ops(struct instruction *insn) } } -static void add_call_dest(struct objtool_file *file, struct instruction *insn, - struct symbol *dest, bool sibling) +static void annotate_call_site(struct objtool_file *file, + struct instruction *insn, bool sibling) { struct reloc *reloc = insn_reloc(file, insn); + struct symbol *sym = insn->call_dest; - insn->call_dest = dest; - if (!dest) + if (!sym) + sym = reloc->sym; + + /* + * Alternative replacement code is just template code which is + * sometimes copied to the original instruction. For now, don't + * annotate it. (In the future we might consider annotating the + * original instruction if/when it ever makes sense to do so.) + */ + if (!strcmp(insn->sec->name, ".altinstr_replacement")) return; - if (insn->call_dest->static_call_tramp) { - list_add_tail(&insn->call_node, - &file->static_call_list); + if (sym->static_call_tramp) { + list_add_tail(&insn->call_node, &file->static_call_list); + return; } /* @@ -859,7 +868,7 @@ static void add_call_dest(struct objtool_file *file, struct instruction *insn, * so they need a little help, NOP out any KCOV calls from noinstr * text. */ - if (insn->sec->noinstr && insn->call_dest->kcov) { + if (insn->sec->noinstr && sym->kcov) { if (reloc) { reloc->type = R_NONE; elf_write_reloc(file->elf, reloc); @@ -881,9 +890,11 @@ static void add_call_dest(struct objtool_file *file, struct instruction *insn, */ insn->retpoline_safe = true; } + + return; } - if (mcount && insn->call_dest->fentry) { + if (mcount && sym->fentry) { if (sibling) WARN_FUNC("Tail call to __fentry__ !?!?", insn->sec, insn->offset); @@ -898,9 +909,17 @@ static void add_call_dest(struct objtool_file *file, struct instruction *insn, insn->type = INSN_NOP; - list_add_tail(&insn->mcount_loc_node, - &file->mcount_loc_list); + list_add_tail(&insn->mcount_loc_node, &file->mcount_loc_list); + return; } +} + +static void add_call_dest(struct objtool_file *file, struct instruction *insn, + struct symbol *dest, bool sibling) +{ + insn->call_dest = dest; + if (!dest) + return; /* * Whatever stack impact regular CALLs have, should be undone @@ -910,6 +929,8 @@ static void add_call_dest(struct objtool_file *file, struct instruction *insn, * are converted to JUMP, see read_intra_function_calls(). */ remove_insn_ops(insn); + + annotate_call_site(file, insn, sibling); } /* From cad0e43a8c302f12dc2c1c2f78b2303b942ceb37 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 26 Oct 2021 14:01:35 +0200 Subject: [PATCH 0669/1056] objtool: Shrink struct instruction commit c509331b41b7365e17396c246e8c5797bccc8074 upstream. Any one instruction can only ever call a single function, therefore insn->mcount_loc_node is superfluous and can use insn->call_node. This shrinks struct instruction, which is by far the most numerous structure objtool creates. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Borislav Petkov Acked-by: Josh Poimboeuf Tested-by: Alexei Starovoitov Link: https://lore.kernel.org/r/20211026120309.785456706@infradead.org Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Greg Kroah-Hartman --- tools/objtool/check.c | 6 +++--- tools/objtool/include/objtool/check.h | 1 - 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/tools/objtool/check.c b/tools/objtool/check.c index dcceffe8975194..49291f88b0876e 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -551,7 +551,7 @@ static int create_mcount_loc_sections(struct objtool_file *file) return 0; idx = 0; - list_for_each_entry(insn, &file->mcount_loc_list, mcount_loc_node) + list_for_each_entry(insn, &file->mcount_loc_list, call_node) idx++; sec = elf_create_section(file->elf, "__mcount_loc", 0, sizeof(unsigned long), idx); @@ -559,7 +559,7 @@ static int create_mcount_loc_sections(struct objtool_file *file) return -1; idx = 0; - list_for_each_entry(insn, &file->mcount_loc_list, mcount_loc_node) { + list_for_each_entry(insn, &file->mcount_loc_list, call_node) { loc = (unsigned long *)sec->data->d_buf + idx; memset(loc, 0, sizeof(unsigned long)); @@ -909,7 +909,7 @@ static void annotate_call_site(struct objtool_file *file, insn->type = INSN_NOP; - list_add_tail(&insn->mcount_loc_node, &file->mcount_loc_list); + list_add_tail(&insn->call_node, &file->mcount_loc_list); return; } } diff --git a/tools/objtool/include/objtool/check.h b/tools/objtool/include/objtool/check.h index 56d50bc50c10c5..fe8cf60ea69fce 100644 --- a/tools/objtool/include/objtool/check.h +++ b/tools/objtool/include/objtool/check.h @@ -40,7 +40,6 @@ struct instruction { struct list_head list; struct hlist_node hash; struct list_head call_node; - struct list_head mcount_loc_node; struct section *sec; unsigned long offset; unsigned int len; From 503882b5aeb694dfa7cb0a3fda174fbdfb6af059 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 26 Oct 2021 14:01:36 +0200 Subject: [PATCH 0670/1056] objtool,x86: Replace alternatives with .retpoline_sites commit 134ab5bd1883312d7a4b3033b05c6b5a1bb8889b upstream. Instead of writing complete alternatives, simply provide a list of all the retpoline thunk calls. Then the kernel is free to do with them as it pleases. Simpler code all-round. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Borislav Petkov Acked-by: Josh Poimboeuf Tested-by: Alexei Starovoitov Link: https://lore.kernel.org/r/20211026120309.850007165@infradead.org [cascardo: fixed conflict because of missing 8b946cc38e063f0f7bb67789478c38f6d7d457c9] Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/vmlinux.lds.S | 14 +++ tools/objtool/arch/x86/decode.c | 120 ------------------------- tools/objtool/check.c | 132 ++++++++++++++++++++-------- tools/objtool/elf.c | 84 ------------------ tools/objtool/include/objtool/elf.h | 1 - tools/objtool/special.c | 8 -- 6 files changed, 107 insertions(+), 252 deletions(-) diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index efd9e9ea17f255..3d6dc12d198f7c 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -272,6 +272,20 @@ SECTIONS __parainstructions_end = .; } +#ifdef CONFIG_RETPOLINE + /* + * List of instructions that call/jmp/jcc to retpoline thunks + * __x86_indirect_thunk_*(). These instructions can be patched along + * with alternatives, after which the section can be freed. + */ + . = ALIGN(8); + .retpoline_sites : AT(ADDR(.retpoline_sites) - LOAD_OFFSET) { + __retpoline_sites = .; + *(.retpoline_sites) + __retpoline_sites_end = .; + } +#endif + /* * struct alt_inst entries. From the header (alternative.h): * "Alternative instructions for different CPU types or capabilities" diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c index 63ffbc36dacc2d..3861078fff8c42 100644 --- a/tools/objtool/arch/x86/decode.c +++ b/tools/objtool/arch/x86/decode.c @@ -684,126 +684,6 @@ const char *arch_ret_insn(int len) return ret[len-1]; } -/* asm/alternative.h ? */ - -#define ALTINSTR_FLAG_INV (1 << 15) -#define ALT_NOT(feat) ((feat) | ALTINSTR_FLAG_INV) - -struct alt_instr { - s32 instr_offset; /* original instruction */ - s32 repl_offset; /* offset to replacement instruction */ - u16 cpuid; /* cpuid bit set for replacement */ - u8 instrlen; /* length of original instruction */ - u8 replacementlen; /* length of new instruction */ -} __packed; - -static int elf_add_alternative(struct elf *elf, - struct instruction *orig, struct symbol *sym, - int cpuid, u8 orig_len, u8 repl_len) -{ - const int size = sizeof(struct alt_instr); - struct alt_instr *alt; - struct section *sec; - Elf_Scn *s; - - sec = find_section_by_name(elf, ".altinstructions"); - if (!sec) { - sec = elf_create_section(elf, ".altinstructions", - SHF_ALLOC, 0, 0); - - if (!sec) { - WARN_ELF("elf_create_section"); - return -1; - } - } - - s = elf_getscn(elf->elf, sec->idx); - if (!s) { - WARN_ELF("elf_getscn"); - return -1; - } - - sec->data = elf_newdata(s); - if (!sec->data) { - WARN_ELF("elf_newdata"); - return -1; - } - - sec->data->d_size = size; - sec->data->d_align = 1; - - alt = sec->data->d_buf = malloc(size); - if (!sec->data->d_buf) { - perror("malloc"); - return -1; - } - memset(sec->data->d_buf, 0, size); - - if (elf_add_reloc_to_insn(elf, sec, sec->sh.sh_size, - R_X86_64_PC32, orig->sec, orig->offset)) { - WARN("elf_create_reloc: alt_instr::instr_offset"); - return -1; - } - - if (elf_add_reloc(elf, sec, sec->sh.sh_size + 4, - R_X86_64_PC32, sym, 0)) { - WARN("elf_create_reloc: alt_instr::repl_offset"); - return -1; - } - - alt->cpuid = bswap_if_needed(cpuid); - alt->instrlen = orig_len; - alt->replacementlen = repl_len; - - sec->sh.sh_size += size; - sec->changed = true; - - return 0; -} - -#define X86_FEATURE_RETPOLINE ( 7*32+12) - -int arch_rewrite_retpolines(struct objtool_file *file) -{ - struct instruction *insn; - struct reloc *reloc; - struct symbol *sym; - char name[32] = ""; - - list_for_each_entry(insn, &file->retpoline_call_list, call_node) { - - if (insn->type != INSN_JUMP_DYNAMIC && - insn->type != INSN_CALL_DYNAMIC) - continue; - - if (!strcmp(insn->sec->name, ".text.__x86.indirect_thunk")) - continue; - - reloc = insn->reloc; - - sprintf(name, "__x86_indirect_alt_%s_%s", - insn->type == INSN_JUMP_DYNAMIC ? "jmp" : "call", - reloc->sym->name + 21); - - sym = find_symbol_by_name(file->elf, name); - if (!sym) { - sym = elf_create_undef_symbol(file->elf, name); - if (!sym) { - WARN("elf_create_undef_symbol"); - return -1; - } - } - - if (elf_add_alternative(file->elf, insn, sym, - ALT_NOT(X86_FEATURE_RETPOLINE), 5, 5)) { - WARN("elf_add_alternative"); - return -1; - } - } - - return 0; -} - int arch_decode_hint_reg(struct instruction *insn, u8 sp_reg) { struct cfi_reg *cfa = &insn->cfi.cfa; diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 49291f88b0876e..624e136fc22cfc 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -533,6 +533,52 @@ static int create_static_call_sections(struct objtool_file *file) return 0; } +static int create_retpoline_sites_sections(struct objtool_file *file) +{ + struct instruction *insn; + struct section *sec; + int idx; + + sec = find_section_by_name(file->elf, ".retpoline_sites"); + if (sec) { + WARN("file already has .retpoline_sites, skipping"); + return 0; + } + + idx = 0; + list_for_each_entry(insn, &file->retpoline_call_list, call_node) + idx++; + + if (!idx) + return 0; + + sec = elf_create_section(file->elf, ".retpoline_sites", 0, + sizeof(int), idx); + if (!sec) { + WARN("elf_create_section: .retpoline_sites"); + return -1; + } + + idx = 0; + list_for_each_entry(insn, &file->retpoline_call_list, call_node) { + + int *site = (int *)sec->data->d_buf + idx; + *site = 0; + + if (elf_add_reloc_to_insn(file->elf, sec, + idx * sizeof(int), + R_X86_64_PC32, + insn->sec, insn->offset)) { + WARN("elf_add_reloc_to_insn: .retpoline_sites"); + return -1; + } + + idx++; + } + + return 0; +} + static int create_mcount_loc_sections(struct objtool_file *file) { struct section *sec; @@ -863,6 +909,11 @@ static void annotate_call_site(struct objtool_file *file, return; } + if (sym->retpoline_thunk) { + list_add_tail(&insn->call_node, &file->retpoline_call_list); + return; + } + /* * Many compilers cannot disable KCOV with a function attribute * so they need a little help, NOP out any KCOV calls from noinstr @@ -933,6 +984,39 @@ static void add_call_dest(struct objtool_file *file, struct instruction *insn, annotate_call_site(file, insn, sibling); } +static void add_retpoline_call(struct objtool_file *file, struct instruction *insn) +{ + /* + * Retpoline calls/jumps are really dynamic calls/jumps in disguise, + * so convert them accordingly. + */ + switch (insn->type) { + case INSN_CALL: + insn->type = INSN_CALL_DYNAMIC; + break; + case INSN_JUMP_UNCONDITIONAL: + insn->type = INSN_JUMP_DYNAMIC; + break; + case INSN_JUMP_CONDITIONAL: + insn->type = INSN_JUMP_DYNAMIC_CONDITIONAL; + break; + default: + return; + } + + insn->retpoline_safe = true; + + /* + * Whatever stack impact regular CALLs have, should be undone + * by the RETURN of the called function. + * + * Annotated intra-function calls retain the stack_ops but + * are converted to JUMP, see read_intra_function_calls(). + */ + remove_insn_ops(insn); + + annotate_call_site(file, insn, false); +} /* * Find the destination instructions for all jumps. */ @@ -955,19 +1039,7 @@ static int add_jump_destinations(struct objtool_file *file) dest_sec = reloc->sym->sec; dest_off = arch_dest_reloc_offset(reloc->addend); } else if (reloc->sym->retpoline_thunk) { - /* - * Retpoline jumps are really dynamic jumps in - * disguise, so convert them accordingly. - */ - if (insn->type == INSN_JUMP_UNCONDITIONAL) - insn->type = INSN_JUMP_DYNAMIC; - else - insn->type = INSN_JUMP_DYNAMIC_CONDITIONAL; - - list_add_tail(&insn->call_node, - &file->retpoline_call_list); - - insn->retpoline_safe = true; + add_retpoline_call(file, insn); continue; } else if (insn->func) { /* internal or external sibling call (with reloc) */ @@ -1096,18 +1168,7 @@ static int add_call_destinations(struct objtool_file *file) add_call_dest(file, insn, dest, false); } else if (reloc->sym->retpoline_thunk) { - /* - * Retpoline calls are really dynamic calls in - * disguise, so convert them accordingly. - */ - insn->type = INSN_CALL_DYNAMIC; - insn->retpoline_safe = true; - - list_add_tail(&insn->call_node, - &file->retpoline_call_list); - - remove_insn_ops(insn); - continue; + add_retpoline_call(file, insn); } else add_call_dest(file, insn, reloc->sym, false); @@ -1833,11 +1894,6 @@ static void mark_rodata(struct objtool_file *file) file->rodata = found; } -__weak int arch_rewrite_retpolines(struct objtool_file *file) -{ - return 0; -} - static int decode_sections(struct objtool_file *file) { int ret; @@ -1906,15 +1962,6 @@ static int decode_sections(struct objtool_file *file) if (ret) return ret; - /* - * Must be after add_special_section_alts(), since this will emit - * alternatives. Must be after add_{jump,call}_destination(), since - * those create the call insn lists. - */ - ret = arch_rewrite_retpolines(file); - if (ret) - return ret; - return 0; } @@ -3252,6 +3299,13 @@ int check(struct objtool_file *file) goto out; warnings += ret; + if (retpoline) { + ret = create_retpoline_sites_sections(file); + if (ret < 0) + goto out; + warnings += ret; + } + if (mcount) { ret = create_mcount_loc_sections(file); if (ret < 0) diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c index 2554f48c4ede59..bc3005ef5af8fe 100644 --- a/tools/objtool/elf.c +++ b/tools/objtool/elf.c @@ -944,90 +944,6 @@ static int elf_add_string(struct elf *elf, struct section *strtab, char *str) return len; } -struct symbol *elf_create_undef_symbol(struct elf *elf, const char *name) -{ - struct section *symtab, *symtab_shndx; - struct symbol *sym; - Elf_Data *data; - Elf_Scn *s; - - sym = malloc(sizeof(*sym)); - if (!sym) { - perror("malloc"); - return NULL; - } - memset(sym, 0, sizeof(*sym)); - - sym->name = strdup(name); - - sym->sym.st_name = elf_add_string(elf, NULL, sym->name); - if (sym->sym.st_name == -1) - return NULL; - - sym->sym.st_info = GELF_ST_INFO(STB_GLOBAL, STT_NOTYPE); - // st_other 0 - // st_shndx 0 - // st_value 0 - // st_size 0 - - symtab = find_section_by_name(elf, ".symtab"); - if (!symtab) { - WARN("can't find .symtab"); - return NULL; - } - - s = elf_getscn(elf->elf, symtab->idx); - if (!s) { - WARN_ELF("elf_getscn"); - return NULL; - } - - data = elf_newdata(s); - if (!data) { - WARN_ELF("elf_newdata"); - return NULL; - } - - data->d_buf = &sym->sym; - data->d_size = sizeof(sym->sym); - data->d_align = 1; - data->d_type = ELF_T_SYM; - - sym->idx = symtab->sh.sh_size / sizeof(sym->sym); - - symtab->sh.sh_size += data->d_size; - symtab->changed = true; - - symtab_shndx = find_section_by_name(elf, ".symtab_shndx"); - if (symtab_shndx) { - s = elf_getscn(elf->elf, symtab_shndx->idx); - if (!s) { - WARN_ELF("elf_getscn"); - return NULL; - } - - data = elf_newdata(s); - if (!data) { - WARN_ELF("elf_newdata"); - return NULL; - } - - data->d_buf = &sym->sym.st_size; /* conveniently 0 */ - data->d_size = sizeof(Elf32_Word); - data->d_align = 4; - data->d_type = ELF_T_WORD; - - symtab_shndx->sh.sh_size += 4; - symtab_shndx->changed = true; - } - - sym->sec = find_section_by_index(elf, 0); - - elf_add_symbol(elf, sym); - - return sym; -} - struct section *elf_create_section(struct elf *elf, const char *name, unsigned int sh_flags, size_t entsize, int nr) { diff --git a/tools/objtool/include/objtool/elf.h b/tools/objtool/include/objtool/elf.h index d3915f5e0c736b..08821307ac99b7 100644 --- a/tools/objtool/include/objtool/elf.h +++ b/tools/objtool/include/objtool/elf.h @@ -143,7 +143,6 @@ int elf_write_insn(struct elf *elf, struct section *sec, unsigned long offset, unsigned int len, const char *insn); int elf_write_reloc(struct elf *elf, struct reloc *reloc); -struct symbol *elf_create_undef_symbol(struct elf *elf, const char *name); int elf_write(struct elf *elf); void elf_close(struct elf *elf); diff --git a/tools/objtool/special.c b/tools/objtool/special.c index 06c3eacab3d534..e2223dd91c379a 100644 --- a/tools/objtool/special.c +++ b/tools/objtool/special.c @@ -109,14 +109,6 @@ static int get_alt_entry(struct elf *elf, struct special_entry *entry, return -1; } - /* - * Skip retpoline .altinstr_replacement... we already rewrite the - * instructions for retpolines anyway, see arch_is_retpoline() - * usage in add_{call,jump}_destinations(). - */ - if (arch_is_retpoline(new_reloc->sym)) - return 1; - reloc_to_sec_off(new_reloc, &alt->new_sec, &alt->new_off); /* _ASM_EXTABLE_EX hack */ From 655d4097039c959910dbf671f1936237b2b801e7 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 24 Jun 2021 11:41:01 +0200 Subject: [PATCH 0671/1056] objtool: Introduce CFI hash commit 8b946cc38e063f0f7bb67789478c38f6d7d457c9 upstream. Andi reported that objtool on vmlinux.o consumes more memory than his system has, leading to horrific performance. This is in part because we keep a struct instruction for every instruction in the file in-memory. Shrink struct instruction by removing the CFI state (which includes full register state) from it and demand allocating it. Given most instructions don't actually change CFI state, there's lots of repetition there, so add a hash table to find previous CFI instances. Reduces memory consumption (and runtime) for processing an x86_64-allyesconfig: pre: 4:40.84 real, 143.99 user, 44.18 sys, 30624988 mem post: 2:14.61 real, 108.58 user, 25.04 sys, 16396184 mem Suggested-by: Andi Kleen Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20210624095147.756759107@infradead.org Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Greg Kroah-Hartman --- tools/objtool/arch/x86/decode.c | 20 ++-- tools/objtool/check.c | 154 +++++++++++++++++++++++--- tools/objtool/include/objtool/arch.h | 2 +- tools/objtool/include/objtool/cfi.h | 2 + tools/objtool/include/objtool/check.h | 2 +- tools/objtool/orc_gen.c | 15 ++- 6 files changed, 160 insertions(+), 35 deletions(-) diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c index 3861078fff8c42..3d8facc65d813a 100644 --- a/tools/objtool/arch/x86/decode.c +++ b/tools/objtool/arch/x86/decode.c @@ -684,34 +684,32 @@ const char *arch_ret_insn(int len) return ret[len-1]; } -int arch_decode_hint_reg(struct instruction *insn, u8 sp_reg) +int arch_decode_hint_reg(u8 sp_reg, int *base) { - struct cfi_reg *cfa = &insn->cfi.cfa; - switch (sp_reg) { case ORC_REG_UNDEFINED: - cfa->base = CFI_UNDEFINED; + *base = CFI_UNDEFINED; break; case ORC_REG_SP: - cfa->base = CFI_SP; + *base = CFI_SP; break; case ORC_REG_BP: - cfa->base = CFI_BP; + *base = CFI_BP; break; case ORC_REG_SP_INDIRECT: - cfa->base = CFI_SP_INDIRECT; + *base = CFI_SP_INDIRECT; break; case ORC_REG_R10: - cfa->base = CFI_R10; + *base = CFI_R10; break; case ORC_REG_R13: - cfa->base = CFI_R13; + *base = CFI_R13; break; case ORC_REG_DI: - cfa->base = CFI_DI; + *base = CFI_DI; break; case ORC_REG_DX: - cfa->base = CFI_DX; + *base = CFI_DX; break; default: return -1; diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 624e136fc22cfc..a8320f0df786bd 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -6,6 +6,7 @@ #include #include #include +#include #include #include @@ -27,7 +28,11 @@ struct alternative { bool skip_orig; }; -struct cfi_init_state initial_func_cfi; +static unsigned long nr_cfi, nr_cfi_reused, nr_cfi_cache; + +static struct cfi_init_state initial_func_cfi; +static struct cfi_state init_cfi; +static struct cfi_state func_cfi; struct instruction *find_insn(struct objtool_file *file, struct section *sec, unsigned long offset) @@ -267,6 +272,78 @@ static void init_insn_state(struct insn_state *state, struct section *sec) state->noinstr = sec->noinstr; } +static struct cfi_state *cfi_alloc(void) +{ + struct cfi_state *cfi = calloc(sizeof(struct cfi_state), 1); + if (!cfi) { + WARN("calloc failed"); + exit(1); + } + nr_cfi++; + return cfi; +} + +static int cfi_bits; +static struct hlist_head *cfi_hash; + +static inline bool cficmp(struct cfi_state *cfi1, struct cfi_state *cfi2) +{ + return memcmp((void *)cfi1 + sizeof(cfi1->hash), + (void *)cfi2 + sizeof(cfi2->hash), + sizeof(struct cfi_state) - sizeof(struct hlist_node)); +} + +static inline u32 cfi_key(struct cfi_state *cfi) +{ + return jhash((void *)cfi + sizeof(cfi->hash), + sizeof(*cfi) - sizeof(cfi->hash), 0); +} + +static struct cfi_state *cfi_hash_find_or_add(struct cfi_state *cfi) +{ + struct hlist_head *head = &cfi_hash[hash_min(cfi_key(cfi), cfi_bits)]; + struct cfi_state *obj; + + hlist_for_each_entry(obj, head, hash) { + if (!cficmp(cfi, obj)) { + nr_cfi_cache++; + return obj; + } + } + + obj = cfi_alloc(); + *obj = *cfi; + hlist_add_head(&obj->hash, head); + + return obj; +} + +static void cfi_hash_add(struct cfi_state *cfi) +{ + struct hlist_head *head = &cfi_hash[hash_min(cfi_key(cfi), cfi_bits)]; + + hlist_add_head(&cfi->hash, head); +} + +static void *cfi_hash_alloc(unsigned long size) +{ + cfi_bits = max(10, ilog2(size)); + cfi_hash = mmap(NULL, sizeof(struct hlist_head) << cfi_bits, + PROT_READ|PROT_WRITE, + MAP_PRIVATE|MAP_ANON, -1, 0); + if (cfi_hash == (void *)-1L) { + WARN("mmap fail cfi_hash"); + cfi_hash = NULL; + } else if (stats) { + printf("cfi_bits: %d\n", cfi_bits); + } + + return cfi_hash; +} + +static unsigned long nr_insns; +static unsigned long nr_insns_visited; + /* * Call the arch-specific instruction decoder for all the instructions and add * them to the global instruction list. @@ -277,7 +354,6 @@ static int decode_instructions(struct objtool_file *file) struct symbol *func; unsigned long offset; struct instruction *insn; - unsigned long nr_insns = 0; int ret; for_each_sec(file, sec) { @@ -303,7 +379,6 @@ static int decode_instructions(struct objtool_file *file) memset(insn, 0, sizeof(*insn)); INIT_LIST_HEAD(&insn->alts); INIT_LIST_HEAD(&insn->stack_ops); - init_cfi_state(&insn->cfi); insn->sec = sec; insn->offset = offset; @@ -1239,7 +1314,6 @@ static int handle_group_alt(struct objtool_file *file, memset(nop, 0, sizeof(*nop)); INIT_LIST_HEAD(&nop->alts); INIT_LIST_HEAD(&nop->stack_ops); - init_cfi_state(&nop->cfi); nop->sec = special_alt->new_sec; nop->offset = special_alt->new_off + special_alt->new_len; @@ -1648,10 +1722,11 @@ static void set_func_state(struct cfi_state *state) static int read_unwind_hints(struct objtool_file *file) { + struct cfi_state cfi = init_cfi; struct section *sec, *relocsec; - struct reloc *reloc; struct unwind_hint *hint; struct instruction *insn; + struct reloc *reloc; int i; sec = find_section_by_name(file->elf, ".discard.unwind_hints"); @@ -1689,19 +1764,24 @@ static int read_unwind_hints(struct objtool_file *file) insn->hint = true; if (hint->type == UNWIND_HINT_TYPE_FUNC) { - set_func_state(&insn->cfi); + insn->cfi = &func_cfi; continue; } - if (arch_decode_hint_reg(insn, hint->sp_reg)) { + if (insn->cfi) + cfi = *(insn->cfi); + + if (arch_decode_hint_reg(hint->sp_reg, &cfi.cfa.base)) { WARN_FUNC("unsupported unwind_hint sp base reg %d", insn->sec, insn->offset, hint->sp_reg); return -1; } - insn->cfi.cfa.offset = bswap_if_needed(hint->sp_offset); - insn->cfi.type = hint->type; - insn->cfi.end = hint->end; + cfi.cfa.offset = bswap_if_needed(hint->sp_offset); + cfi.type = hint->type; + cfi.end = hint->end; + + insn->cfi = cfi_hash_find_or_add(&cfi); } return 0; @@ -2552,13 +2632,18 @@ static int propagate_alt_cfi(struct objtool_file *file, struct instruction *insn if (!insn->alt_group) return 0; + if (!insn->cfi) { + WARN("CFI missing"); + return -1; + } + alt_cfi = insn->alt_group->cfi; group_off = insn->offset - insn->alt_group->first_insn->offset; if (!alt_cfi[group_off]) { - alt_cfi[group_off] = &insn->cfi; + alt_cfi[group_off] = insn->cfi; } else { - if (memcmp(alt_cfi[group_off], &insn->cfi, sizeof(struct cfi_state))) { + if (cficmp(alt_cfi[group_off], insn->cfi)) { WARN_FUNC("stack layout conflict in alternatives", insn->sec, insn->offset); return -1; @@ -2609,9 +2694,14 @@ static int handle_insn_ops(struct instruction *insn, static bool insn_cfi_match(struct instruction *insn, struct cfi_state *cfi2) { - struct cfi_state *cfi1 = &insn->cfi; + struct cfi_state *cfi1 = insn->cfi; int i; + if (!cfi1) { + WARN("CFI missing"); + return false; + } + if (memcmp(&cfi1->cfa, &cfi2->cfa, sizeof(cfi1->cfa))) { WARN_FUNC("stack state mismatch: cfa1=%d%+d cfa2=%d%+d", @@ -2796,7 +2886,7 @@ static int validate_branch(struct objtool_file *file, struct symbol *func, struct instruction *insn, struct insn_state state) { struct alternative *alt; - struct instruction *next_insn; + struct instruction *next_insn, *prev_insn = NULL; struct section *sec; u8 visited; int ret; @@ -2825,15 +2915,25 @@ static int validate_branch(struct objtool_file *file, struct symbol *func, if (insn->visited & visited) return 0; + } else { + nr_insns_visited++; } if (state.noinstr) state.instr += insn->instr; - if (insn->hint) - state.cfi = insn->cfi; - else - insn->cfi = state.cfi; + if (insn->hint) { + state.cfi = *insn->cfi; + } else { + /* XXX track if we actually changed state.cfi */ + + if (prev_insn && !cficmp(prev_insn->cfi, &state.cfi)) { + insn->cfi = prev_insn->cfi; + nr_cfi_reused++; + } else { + insn->cfi = cfi_hash_find_or_add(&state.cfi); + } + } insn->visited |= visited; @@ -2997,6 +3097,7 @@ static int validate_branch(struct objtool_file *file, struct symbol *func, return 1; } + prev_insn = insn; insn = next_insn; } @@ -3252,10 +3353,20 @@ int check(struct objtool_file *file) int ret, warnings = 0; arch_initial_func_cfi_state(&initial_func_cfi); + init_cfi_state(&init_cfi); + init_cfi_state(&func_cfi); + set_func_state(&func_cfi); + + if (!cfi_hash_alloc(1UL << (file->elf->symbol_bits - 3))) + goto out; + + cfi_hash_add(&init_cfi); + cfi_hash_add(&func_cfi); ret = decode_sections(file); if (ret < 0) goto out; + warnings += ret; if (list_empty(&file->insn_list)) @@ -3313,6 +3424,13 @@ int check(struct objtool_file *file) warnings += ret; } + if (stats) { + printf("nr_insns_visited: %ld\n", nr_insns_visited); + printf("nr_cfi: %ld\n", nr_cfi); + printf("nr_cfi_reused: %ld\n", nr_cfi_reused); + printf("nr_cfi_cache: %ld\n", nr_cfi_cache); + } + out: /* * For now, don't fail the kernel build on fatal warnings. These diff --git a/tools/objtool/include/objtool/arch.h b/tools/objtool/include/objtool/arch.h index 9ca08d95e78eea..deae99b423f358 100644 --- a/tools/objtool/include/objtool/arch.h +++ b/tools/objtool/include/objtool/arch.h @@ -85,7 +85,7 @@ unsigned long arch_dest_reloc_offset(int addend); const char *arch_nop_insn(int len); const char *arch_ret_insn(int len); -int arch_decode_hint_reg(struct instruction *insn, u8 sp_reg); +int arch_decode_hint_reg(u8 sp_reg, int *base); bool arch_is_retpoline(struct symbol *sym); diff --git a/tools/objtool/include/objtool/cfi.h b/tools/objtool/include/objtool/cfi.h index fd5cb0bed9bf02..f11d1ac1dadf1b 100644 --- a/tools/objtool/include/objtool/cfi.h +++ b/tools/objtool/include/objtool/cfi.h @@ -7,6 +7,7 @@ #define _OBJTOOL_CFI_H #include +#include #define CFI_UNDEFINED -1 #define CFI_CFA -2 @@ -24,6 +25,7 @@ struct cfi_init_state { }; struct cfi_state { + struct hlist_node hash; /* must be first, cficmp() */ struct cfi_reg regs[CFI_NUM_REGS]; struct cfi_reg vals[CFI_NUM_REGS]; struct cfi_reg cfa; diff --git a/tools/objtool/include/objtool/check.h b/tools/objtool/include/objtool/check.h index fe8cf60ea69fce..6cfff078897f5c 100644 --- a/tools/objtool/include/objtool/check.h +++ b/tools/objtool/include/objtool/check.h @@ -59,7 +59,7 @@ struct instruction { struct list_head alts; struct symbol *func; struct list_head stack_ops; - struct cfi_state cfi; + struct cfi_state *cfi; }; static inline bool is_static_jump(struct instruction *insn) diff --git a/tools/objtool/orc_gen.c b/tools/objtool/orc_gen.c index b5865e2450cbb5..dd3c64af9db237 100644 --- a/tools/objtool/orc_gen.c +++ b/tools/objtool/orc_gen.c @@ -13,13 +13,19 @@ #include #include -static int init_orc_entry(struct orc_entry *orc, struct cfi_state *cfi) +static int init_orc_entry(struct orc_entry *orc, struct cfi_state *cfi, + struct instruction *insn) { - struct instruction *insn = container_of(cfi, struct instruction, cfi); struct cfi_reg *bp = &cfi->regs[CFI_BP]; memset(orc, 0, sizeof(*orc)); + if (!cfi) { + orc->end = 0; + orc->sp_reg = ORC_REG_UNDEFINED; + return 0; + } + orc->end = cfi->end; if (cfi->cfa.base == CFI_UNDEFINED) { @@ -162,7 +168,7 @@ int orc_create(struct objtool_file *file) int i; if (!alt_group) { - if (init_orc_entry(&orc, &insn->cfi)) + if (init_orc_entry(&orc, insn->cfi, insn)) return -1; if (!memcmp(&prev_orc, &orc, sizeof(orc))) continue; @@ -186,7 +192,8 @@ int orc_create(struct objtool_file *file) struct cfi_state *cfi = alt_group->cfi[i]; if (!cfi) continue; - if (init_orc_entry(&orc, cfi)) + /* errors are reported on the original insn */ + if (init_orc_entry(&orc, cfi, insn)) return -1; if (!memcmp(&prev_orc, &orc, sizeof(orc))) continue; From 18576e45b12a73504659d7fd82955d2aaf99ecd5 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 26 Oct 2021 14:01:37 +0200 Subject: [PATCH 0672/1056] x86/retpoline: Remove unused replacement symbols commit 4fe79e710d9574a14993f8b4e16b7252da72d5e8 upstream. Now that objtool no longer creates alternatives, these replacement symbols are no longer needed, remove them. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Borislav Petkov Acked-by: Josh Poimboeuf Tested-by: Alexei Starovoitov Link: https://lore.kernel.org/r/20211026120309.915051744@infradead.org Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/asm-prototypes.h | 10 ------- arch/x86/lib/retpoline.S | 42 --------------------------- 2 files changed, 52 deletions(-) diff --git a/arch/x86/include/asm/asm-prototypes.h b/arch/x86/include/asm/asm-prototypes.h index 4cb726c71ed8c6..a28c5cab893d78 100644 --- a/arch/x86/include/asm/asm-prototypes.h +++ b/arch/x86/include/asm/asm-prototypes.h @@ -24,14 +24,4 @@ extern void cmpxchg8b_emu(void); extern asmlinkage void __x86_indirect_thunk_ ## reg (void); #include -#undef GEN -#define GEN(reg) \ - extern asmlinkage void __x86_indirect_alt_call_ ## reg (void); -#include - -#undef GEN -#define GEN(reg) \ - extern asmlinkage void __x86_indirect_alt_jmp_ ## reg (void); -#include - #endif /* CONFIG_RETPOLINE */ diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S index 9556ff5f4773fd..9cd9d2daf68d2a 100644 --- a/arch/x86/lib/retpoline.S +++ b/arch/x86/lib/retpoline.S @@ -40,36 +40,6 @@ SYM_FUNC_END(__x86_indirect_thunk_\reg) .endm -/* - * This generates .altinstr_replacement symbols for use by objtool. They, - * however, must not actually live in .altinstr_replacement since that will be - * discarded after init, but module alternatives will also reference these - * symbols. - * - * Their names matches the "__x86_indirect_" prefix to mark them as retpolines. - */ -.macro ALT_THUNK reg - - .align 1 - -SYM_FUNC_START_NOALIGN(__x86_indirect_alt_call_\reg) - ANNOTATE_RETPOLINE_SAFE -1: call *%\reg -2: .skip 5-(2b-1b), 0x90 -SYM_FUNC_END(__x86_indirect_alt_call_\reg) - -STACK_FRAME_NON_STANDARD(__x86_indirect_alt_call_\reg) - -SYM_FUNC_START_NOALIGN(__x86_indirect_alt_jmp_\reg) - ANNOTATE_RETPOLINE_SAFE -1: jmp *%\reg -2: .skip 5-(2b-1b), 0x90 -SYM_FUNC_END(__x86_indirect_alt_jmp_\reg) - -STACK_FRAME_NON_STANDARD(__x86_indirect_alt_jmp_\reg) - -.endm - /* * Despite being an assembler file we can't just use .irp here * because __KSYM_DEPS__ only uses the C preprocessor and would @@ -92,15 +62,3 @@ STACK_FRAME_NON_STANDARD(__x86_indirect_alt_jmp_\reg) #undef GEN #define GEN(reg) EXPORT_THUNK(reg) #include - -#undef GEN -#define GEN(reg) ALT_THUNK reg -#include - -#undef GEN -#define GEN(reg) __EXPORT_THUNK(__x86_indirect_alt_call_ ## reg) -#include - -#undef GEN -#define GEN(reg) __EXPORT_THUNK(__x86_indirect_alt_jmp_ ## reg) -#include From 71e578e1bd74d716a5cb136dbc55594dbf4450ef Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 26 Oct 2021 14:01:38 +0200 Subject: [PATCH 0673/1056] x86/asm: Fix register order commit a92ede2d584a2e070def59c7e47e6b6f6341c55c upstream. Ensure the register order is correct; this allows for easy translation between register number and trampoline and vice-versa. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Borislav Petkov Acked-by: Josh Poimboeuf Tested-by: Alexei Starovoitov Link: https://lore.kernel.org/r/20211026120309.978573921@infradead.org Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/GEN-for-each-reg.h | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/arch/x86/include/asm/GEN-for-each-reg.h b/arch/x86/include/asm/GEN-for-each-reg.h index 1b07fb102c4ed7..07949102a08d0b 100644 --- a/arch/x86/include/asm/GEN-for-each-reg.h +++ b/arch/x86/include/asm/GEN-for-each-reg.h @@ -1,11 +1,16 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * These are in machine order; things rely on that. + */ #ifdef CONFIG_64BIT GEN(rax) -GEN(rbx) GEN(rcx) GEN(rdx) +GEN(rbx) +GEN(rsp) +GEN(rbp) GEN(rsi) GEN(rdi) -GEN(rbp) GEN(r8) GEN(r9) GEN(r10) @@ -16,10 +21,11 @@ GEN(r14) GEN(r15) #else GEN(eax) -GEN(ebx) GEN(ecx) GEN(edx) +GEN(ebx) +GEN(esp) +GEN(ebp) GEN(esi) GEN(edi) -GEN(ebp) #endif From 6a6fcb256301a3f2ca29a1c2adda71c6ab272ab7 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 26 Oct 2021 14:01:39 +0200 Subject: [PATCH 0674/1056] x86/asm: Fixup odd GEN-for-each-reg.h usage commit b6d3d9944bd7c9e8c06994ead3c9952f673f2a66 upstream. Currently GEN-for-each-reg.h usage leaves GEN defined, relying on any subsequent usage to start with #undef, which is rude. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Borislav Petkov Acked-by: Josh Poimboeuf Tested-by: Alexei Starovoitov Link: https://lore.kernel.org/r/20211026120310.041792350@infradead.org Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/asm-prototypes.h | 2 +- arch/x86/lib/retpoline.S | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/include/asm/asm-prototypes.h b/arch/x86/include/asm/asm-prototypes.h index a28c5cab893d78..a2bed09d3c1189 100644 --- a/arch/x86/include/asm/asm-prototypes.h +++ b/arch/x86/include/asm/asm-prototypes.h @@ -19,9 +19,9 @@ extern void cmpxchg8b_emu(void); #ifdef CONFIG_RETPOLINE -#undef GEN #define GEN(reg) \ extern asmlinkage void __x86_indirect_thunk_ ## reg (void); #include +#undef GEN #endif /* CONFIG_RETPOLINE */ diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S index 9cd9d2daf68d2a..3da2dffacdef0c 100644 --- a/arch/x86/lib/retpoline.S +++ b/arch/x86/lib/retpoline.S @@ -55,10 +55,10 @@ SYM_FUNC_END(__x86_indirect_thunk_\reg) #define __EXPORT_THUNK(sym) _ASM_NOKPROBE(sym); EXPORT_SYMBOL(sym) #define EXPORT_THUNK(reg) __EXPORT_THUNK(__x86_indirect_thunk_ ## reg) -#undef GEN #define GEN(reg) THUNK reg #include - #undef GEN + #define GEN(reg) EXPORT_THUNK(reg) #include +#undef GEN From 90ca76c83044cb49a50b31a171a8daa0adcc2c2f Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 26 Oct 2021 14:01:40 +0200 Subject: [PATCH 0675/1056] x86/retpoline: Move the retpoline thunk declarations to nospec-branch.h commit 6fda8a38865607db739be3e567a2387376222dbd upstream. Because it makes no sense to split the retpoline gunk over multiple headers. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Borislav Petkov Acked-by: Josh Poimboeuf Tested-by: Alexei Starovoitov Link: https://lore.kernel.org/r/20211026120310.106290934@infradead.org Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/asm-prototypes.h | 8 -------- arch/x86/include/asm/nospec-branch.h | 7 +++++++ arch/x86/net/bpf_jit_comp.c | 1 - 3 files changed, 7 insertions(+), 9 deletions(-) diff --git a/arch/x86/include/asm/asm-prototypes.h b/arch/x86/include/asm/asm-prototypes.h index a2bed09d3c1189..8f80de627c60a8 100644 --- a/arch/x86/include/asm/asm-prototypes.h +++ b/arch/x86/include/asm/asm-prototypes.h @@ -17,11 +17,3 @@ extern void cmpxchg8b_emu(void); #endif -#ifdef CONFIG_RETPOLINE - -#define GEN(reg) \ - extern asmlinkage void __x86_indirect_thunk_ ## reg (void); -#include -#undef GEN - -#endif /* CONFIG_RETPOLINE */ diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 9783c6b0888682..280e3e248e065e 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -5,6 +5,7 @@ #include #include +#include #include #include @@ -118,6 +119,12 @@ ".popsection\n\t" #ifdef CONFIG_RETPOLINE + +#define GEN(reg) \ + extern asmlinkage void __x86_indirect_thunk_ ## reg (void); +#include +#undef GEN + #ifdef CONFIG_X86_64 /* diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 8dca2bcbb0eae3..b0ffb5698408db 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -15,7 +15,6 @@ #include #include #include -#include static u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len) { From 936c8fcb49956843af2be61423a1d0d1f91ecafa Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 26 Oct 2021 14:01:41 +0200 Subject: [PATCH 0676/1056] x86/retpoline: Create a retpoline thunk array commit 1a6f74429c42a3854980359a758e222005712aee upstream. Stick all the retpolines in a single symbol and have the individual thunks as inner labels, this should guarantee thunk order and layout. Previously there were 16 (or rather 15 without rsp) separate symbols and a toolchain might reasonably expect it could displace them however it liked, with disregard for their relative position. However, now they're part of a larger symbol. Any change to their relative position would disrupt this larger _array symbol and thus not be sound. This is the same reasoning used for data symbols. On their own there is no guarantee about their relative position wrt to one aonther, but we're still able to do arrays because an array as a whole is a single larger symbol. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Borislav Petkov Acked-by: Josh Poimboeuf Tested-by: Alexei Starovoitov Link: https://lore.kernel.org/r/20211026120310.169659320@infradead.org Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/nospec-branch.h | 8 +++++++- arch/x86/lib/retpoline.S | 14 +++++++++----- 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 280e3e248e065e..f0970412b0cd92 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -12,6 +12,8 @@ #include #include +#define RETPOLINE_THUNK_SIZE 32 + /* * Fill the CPU return stack buffer. * @@ -120,11 +122,15 @@ #ifdef CONFIG_RETPOLINE +typedef u8 retpoline_thunk_t[RETPOLINE_THUNK_SIZE]; + #define GEN(reg) \ - extern asmlinkage void __x86_indirect_thunk_ ## reg (void); + extern retpoline_thunk_t __x86_indirect_thunk_ ## reg; #include #undef GEN +extern retpoline_thunk_t __x86_indirect_thunk_array[]; + #ifdef CONFIG_X86_64 /* diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S index 3da2dffacdef0c..afbdda539b8010 100644 --- a/arch/x86/lib/retpoline.S +++ b/arch/x86/lib/retpoline.S @@ -28,16 +28,14 @@ .macro THUNK reg - .align 32 - -SYM_FUNC_START(__x86_indirect_thunk_\reg) + .align RETPOLINE_THUNK_SIZE +SYM_INNER_LABEL(__x86_indirect_thunk_\reg, SYM_L_GLOBAL) + UNWIND_HINT_EMPTY ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), \ __stringify(RETPOLINE \reg), X86_FEATURE_RETPOLINE, \ __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *%\reg; int3), X86_FEATURE_RETPOLINE_LFENCE -SYM_FUNC_END(__x86_indirect_thunk_\reg) - .endm /* @@ -55,10 +53,16 @@ SYM_FUNC_END(__x86_indirect_thunk_\reg) #define __EXPORT_THUNK(sym) _ASM_NOKPROBE(sym); EXPORT_SYMBOL(sym) #define EXPORT_THUNK(reg) __EXPORT_THUNK(__x86_indirect_thunk_ ## reg) + .align RETPOLINE_THUNK_SIZE +SYM_CODE_START(__x86_indirect_thunk_array) + #define GEN(reg) THUNK reg #include #undef GEN + .align RETPOLINE_THUNK_SIZE +SYM_CODE_END(__x86_indirect_thunk_array) + #define GEN(reg) EXPORT_THUNK(reg) #include #undef GEN From 86900f95fb060ca8ad9062f1905d3ffaf3bac707 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 26 Oct 2021 14:01:42 +0200 Subject: [PATCH 0677/1056] x86/alternative: Implement .retpoline_sites support commit 7508500900814d14e2e085cdc4e28142721abbdf upstream. Rewrite retpoline thunk call sites to be indirect calls for spectre_v2=off. This ensures spectre_v2=off is as near to a RETPOLINE=n build as possible. This is the replacement for objtool writing alternative entries to ensure the same and achieves feature-parity with the previous approach. One noteworthy feature is that it relies on the thunks to be in machine order to compute the register index. Specifically, this does not yet address the Jcc __x86_indirect_thunk_* calls generated by clang, a future patch will add this. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Borislav Petkov Acked-by: Josh Poimboeuf Tested-by: Alexei Starovoitov Link: https://lore.kernel.org/r/20211026120310.232495794@infradead.org [cascardo: small conflict fixup at arch/x86/kernel/module.c] Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Greg Kroah-Hartman --- arch/um/kernel/um_arch.c | 4 + arch/x86/include/asm/alternative.h | 1 + arch/x86/kernel/alternative.c | 141 ++++++++++++++++++++++++++++- arch/x86/kernel/module.c | 9 +- 4 files changed, 150 insertions(+), 5 deletions(-) diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c index a149a5e9a16a17..54447690de1159 100644 --- a/arch/um/kernel/um_arch.c +++ b/arch/um/kernel/um_arch.c @@ -421,6 +421,10 @@ void __init check_bugs(void) os_check_bugs(); } +void apply_retpolines(s32 *start, s32 *end) +{ +} + void apply_alternatives(struct alt_instr *start, struct alt_instr *end) { } diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index a3c2315aca121d..58eee640283254 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h @@ -75,6 +75,7 @@ extern int alternatives_patched; extern void alternative_instructions(void); extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end); +extern void apply_retpolines(s32 *start, s32 *end); struct module; diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index ae0f718b8ebb53..8dff99e8682788 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -29,6 +29,7 @@ #include #include #include +#include int __read_mostly alternatives_patched; @@ -113,6 +114,7 @@ static void __init_or_module add_nops(void *insns, unsigned int len) } } +extern s32 __retpoline_sites[], __retpoline_sites_end[]; extern struct alt_instr __alt_instructions[], __alt_instructions_end[]; extern s32 __smp_locks[], __smp_locks_end[]; void text_poke_early(void *addr, const void *opcode, size_t len); @@ -221,7 +223,7 @@ static __always_inline int optimize_nops_range(u8 *instr, u8 instrlen, int off) * "noinline" to cause control flow change and thus invalidate I$ and * cause refetch after modification. */ -static void __init_or_module noinline optimize_nops(struct alt_instr *a, u8 *instr) +static void __init_or_module noinline optimize_nops(u8 *instr, size_t len) { struct insn insn; int i = 0; @@ -239,11 +241,11 @@ static void __init_or_module noinline optimize_nops(struct alt_instr *a, u8 *ins * optimized. */ if (insn.length == 1 && insn.opcode.bytes[0] == 0x90) - i += optimize_nops_range(instr, a->instrlen, i); + i += optimize_nops_range(instr, len, i); else i += insn.length; - if (i >= a->instrlen) + if (i >= len) return; } } @@ -331,10 +333,135 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start, text_poke_early(instr, insn_buff, insn_buff_sz); next: - optimize_nops(a, instr); + optimize_nops(instr, a->instrlen); } } +#if defined(CONFIG_RETPOLINE) && defined(CONFIG_STACK_VALIDATION) + +/* + * CALL/JMP *%\reg + */ +static int emit_indirect(int op, int reg, u8 *bytes) +{ + int i = 0; + u8 modrm; + + switch (op) { + case CALL_INSN_OPCODE: + modrm = 0x10; /* Reg = 2; CALL r/m */ + break; + + case JMP32_INSN_OPCODE: + modrm = 0x20; /* Reg = 4; JMP r/m */ + break; + + default: + WARN_ON_ONCE(1); + return -1; + } + + if (reg >= 8) { + bytes[i++] = 0x41; /* REX.B prefix */ + reg -= 8; + } + + modrm |= 0xc0; /* Mod = 3 */ + modrm += reg; + + bytes[i++] = 0xff; /* opcode */ + bytes[i++] = modrm; + + return i; +} + +/* + * Rewrite the compiler generated retpoline thunk calls. + * + * For spectre_v2=off (!X86_FEATURE_RETPOLINE), rewrite them into immediate + * indirect instructions, avoiding the extra indirection. + * + * For example, convert: + * + * CALL __x86_indirect_thunk_\reg + * + * into: + * + * CALL *%\reg + * + */ +static int patch_retpoline(void *addr, struct insn *insn, u8 *bytes) +{ + retpoline_thunk_t *target; + int reg, i = 0; + + target = addr + insn->length + insn->immediate.value; + reg = target - __x86_indirect_thunk_array; + + if (WARN_ON_ONCE(reg & ~0xf)) + return -1; + + /* If anyone ever does: CALL/JMP *%rsp, we're in deep trouble. */ + BUG_ON(reg == 4); + + if (cpu_feature_enabled(X86_FEATURE_RETPOLINE)) + return -1; + + i = emit_indirect(insn->opcode.bytes[0], reg, bytes); + if (i < 0) + return i; + + for (; i < insn->length;) + bytes[i++] = BYTES_NOP1; + + return i; +} + +/* + * Generated by 'objtool --retpoline'. + */ +void __init_or_module noinline apply_retpolines(s32 *start, s32 *end) +{ + s32 *s; + + for (s = start; s < end; s++) { + void *addr = (void *)s + *s; + struct insn insn; + int len, ret; + u8 bytes[16]; + u8 op1, op2; + + ret = insn_decode_kernel(&insn, addr); + if (WARN_ON_ONCE(ret < 0)) + continue; + + op1 = insn.opcode.bytes[0]; + op2 = insn.opcode.bytes[1]; + + switch (op1) { + case CALL_INSN_OPCODE: + case JMP32_INSN_OPCODE: + break; + + default: + WARN_ON_ONCE(1); + continue; + } + + len = patch_retpoline(addr, &insn, bytes); + if (len == insn.length) { + optimize_nops(bytes, len); + text_poke_early(addr, bytes, len); + } + } +} + +#else /* !RETPOLINES || !CONFIG_STACK_VALIDATION */ + +void __init_or_module noinline apply_retpolines(s32 *start, s32 *end) { } + +#endif /* CONFIG_RETPOLINE && CONFIG_STACK_VALIDATION */ + #ifdef CONFIG_SMP static void alternatives_smp_lock(const s32 *start, const s32 *end, u8 *text, u8 *text_end) @@ -642,6 +769,12 @@ void __init alternative_instructions(void) */ apply_paravirt(__parainstructions, __parainstructions_end); + /* + * Rewrite the retpolines, must be done before alternatives since + * those can rewrite the retpoline thunks. + */ + apply_retpolines(__retpoline_sites, __retpoline_sites_end); + /* * Then patch alternatives, such that those paravirt calls that are in * alternatives can be overwritten by their immediate fragments. diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c index 32e546e4162963..96d7c27b7093db 100644 --- a/arch/x86/kernel/module.c +++ b/arch/x86/kernel/module.c @@ -252,7 +252,8 @@ int module_finalize(const Elf_Ehdr *hdr, struct module *me) { const Elf_Shdr *s, *text = NULL, *alt = NULL, *locks = NULL, - *para = NULL, *orc = NULL, *orc_ip = NULL; + *para = NULL, *orc = NULL, *orc_ip = NULL, + *retpolines = NULL; char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) { @@ -268,6 +269,8 @@ int module_finalize(const Elf_Ehdr *hdr, orc = s; if (!strcmp(".orc_unwind_ip", secstrings + s->sh_name)) orc_ip = s; + if (!strcmp(".retpoline_sites", secstrings + s->sh_name)) + retpolines = s; } /* @@ -278,6 +281,10 @@ int module_finalize(const Elf_Ehdr *hdr, void *pseg = (void *)para->sh_addr; apply_paravirt(pseg, pseg + para->sh_size); } + if (retpolines) { + void *rseg = (void *)retpolines->sh_addr; + apply_retpolines(rseg, rseg + retpolines->sh_size); + } if (alt) { /* patch .altinstructions */ void *aseg = (void *)alt->sh_addr; From a40925e2efd80d22d3ba8b4477caece062764e4d Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 26 Oct 2021 14:01:43 +0200 Subject: [PATCH 0678/1056] x86/alternative: Handle Jcc __x86_indirect_thunk_\reg commit 2f0cbb2a8e5bbf101e9de118fc0eb168111a5e1e upstream. Handle the rare cases where the compiler (clang) does an indirect conditional tail-call using: Jcc __x86_indirect_thunk_\reg For the !RETPOLINE case this can be rewritten to fit the original (6 byte) instruction like: Jncc.d8 1f JMP *%\reg NOP 1: Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Borislav Petkov Acked-by: Josh Poimboeuf Tested-by: Alexei Starovoitov Link: https://lore.kernel.org/r/20211026120310.296470217@infradead.org Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/alternative.c | 40 +++++++++++++++++++++++++++++++---- 1 file changed, 36 insertions(+), 4 deletions(-) diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 8dff99e8682788..b4ea0af7299f47 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -393,7 +393,8 @@ static int emit_indirect(int op, int reg, u8 *bytes) static int patch_retpoline(void *addr, struct insn *insn, u8 *bytes) { retpoline_thunk_t *target; - int reg, i = 0; + int reg, ret, i = 0; + u8 op, cc; target = addr + insn->length + insn->immediate.value; reg = target - __x86_indirect_thunk_array; @@ -407,9 +408,36 @@ static int patch_retpoline(void *addr, struct insn *insn, u8 *bytes) if (cpu_feature_enabled(X86_FEATURE_RETPOLINE)) return -1; - i = emit_indirect(insn->opcode.bytes[0], reg, bytes); - if (i < 0) - return i; + op = insn->opcode.bytes[0]; + + /* + * Convert: + * + * Jcc.d32 __x86_indirect_thunk_\reg + * + * into: + * + * Jncc.d8 1f + * JMP *%\reg + * NOP + * 1: + */ + /* Jcc.d32 second opcode byte is in the range: 0x80-0x8f */ + if (op == 0x0f && (insn->opcode.bytes[1] & 0xf0) == 0x80) { + cc = insn->opcode.bytes[1] & 0xf; + cc ^= 1; /* invert condition */ + + bytes[i++] = 0x70 + cc; /* Jcc.d8 */ + bytes[i++] = insn->length - 2; /* sizeof(Jcc.d8) == 2 */ + + /* Continue as if: JMP.d32 __x86_indirect_thunk_\reg */ + op = JMP32_INSN_OPCODE; + } + + ret = emit_indirect(op, reg, bytes + i); + if (ret < 0) + return ret; + i += ret; for (; i < insn->length;) bytes[i++] = BYTES_NOP1; @@ -443,6 +471,10 @@ void __init_or_module noinline apply_retpolines(s32 *start, s32 *end) case JMP32_INSN_OPCODE: break; + case 0x0f: /* escape */ + if (op2 >= 0x80 && op2 <= 0x8f) + break; + fallthrough; default: WARN_ON_ONCE(1); continue; From b93a0a740bcaebc8b597d6fabc98f2340b21da67 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 26 Oct 2021 14:01:44 +0200 Subject: [PATCH 0679/1056] x86/alternative: Try inline spectre_v2=retpoline,amd commit bbe2df3f6b6da7848398d55b1311d58a16ec21e4 upstream. Try and replace retpoline thunk calls with: LFENCE CALL *%\reg for spectre_v2=retpoline,amd. Specifically, the sequence above is 5 bytes for the low 8 registers, but 6 bytes for the high 8 registers. This means that unless the compilers prefix stuff the call with higher registers this replacement will fail. Luckily GCC strongly favours RAX for the indirect calls and most (95%+ for defconfig-x86_64) will be converted. OTOH clang strongly favours R11 and almost nothing gets converted. Note: it will also generate a correct replacement for the Jcc.d32 case, except unless the compilers start to prefix stuff that, it'll never fit. Specifically: Jncc.d8 1f LFENCE JMP *%\reg 1: is 7-8 bytes long, where the original instruction in unpadded form is only 6 bytes. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Borislav Petkov Acked-by: Josh Poimboeuf Tested-by: Alexei Starovoitov Link: https://lore.kernel.org/r/20211026120310.359986601@infradead.org [cascardo: RETPOLINE_AMD was renamed to RETPOLINE_LFENCE] Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/alternative.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index b4ea0af7299f47..63f466153a8850 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -389,6 +389,7 @@ static int emit_indirect(int op, int reg, u8 *bytes) * * CALL *%\reg * + * It also tries to inline spectre_v2=retpoline,amd when size permits. */ static int patch_retpoline(void *addr, struct insn *insn, u8 *bytes) { @@ -405,7 +406,8 @@ static int patch_retpoline(void *addr, struct insn *insn, u8 *bytes) /* If anyone ever does: CALL/JMP *%rsp, we're in deep trouble. */ BUG_ON(reg == 4); - if (cpu_feature_enabled(X86_FEATURE_RETPOLINE)) + if (cpu_feature_enabled(X86_FEATURE_RETPOLINE) && + !cpu_feature_enabled(X86_FEATURE_RETPOLINE_LFENCE)) return -1; op = insn->opcode.bytes[0]; @@ -418,8 +420,9 @@ static int patch_retpoline(void *addr, struct insn *insn, u8 *bytes) * into: * * Jncc.d8 1f + * [ LFENCE ] * JMP *%\reg - * NOP + * [ NOP ] * 1: */ /* Jcc.d32 second opcode byte is in the range: 0x80-0x8f */ @@ -434,6 +437,15 @@ static int patch_retpoline(void *addr, struct insn *insn, u8 *bytes) op = JMP32_INSN_OPCODE; } + /* + * For RETPOLINE_AMD: prepend the indirect CALL/JMP with an LFENCE. + */ + if (cpu_feature_enabled(X86_FEATURE_RETPOLINE_LFENCE)) { + bytes[i++] = 0x0f; + bytes[i++] = 0xae; + bytes[i++] = 0xe8; /* LFENCE */ + } + ret = emit_indirect(op, reg, bytes + i); if (ret < 0) return ret; From 1c2e7b40752a7a3290bcf3c498d73353a02323ad Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 26 Oct 2021 14:01:45 +0200 Subject: [PATCH 0680/1056] x86/alternative: Add debug prints to apply_retpolines() commit d4b5a5c993009ffeb5febe3b701da3faab6adb96 upstream. Make sure we can see the text changes when booting with 'debug-alternative'. Example output: [ ] SMP alternatives: retpoline at: __traceiter_initcall_level+0x1f/0x30 (ffffffff8100066f) len: 5 to: __x86_indirect_thunk_rax+0x0/0x20 [ ] SMP alternatives: ffffffff82603e58: [2:5) optimized NOPs: ff d0 0f 1f 00 [ ] SMP alternatives: ffffffff8100066f: orig: e8 cc 30 00 01 [ ] SMP alternatives: ffffffff8100066f: repl: ff d0 0f 1f 00 Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Borislav Petkov Acked-by: Josh Poimboeuf Tested-by: Alexei Starovoitov Link: https://lore.kernel.org/r/20211026120310.422273830@infradead.org Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/alternative.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 63f466153a8850..b9d062597954ee 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -492,9 +492,15 @@ void __init_or_module noinline apply_retpolines(s32 *start, s32 *end) continue; } + DPRINTK("retpoline at: %pS (%px) len: %d to: %pS", + addr, addr, insn.length, + addr + insn.length + insn.immediate.value); + len = patch_retpoline(addr, &insn, bytes); if (len == insn.length) { optimize_nops(bytes, len); + DUMP_BYTES(((u8*)addr), len, "%px: orig: ", addr); + DUMP_BYTES(((u8*)bytes), len, "%px: repl: ", addr); text_poke_early(addr, bytes, len); } } From 74b5a9f8edfeb7458653a673fa49a7c505acdf02 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 26 Oct 2021 14:01:47 +0200 Subject: [PATCH 0681/1056] bpf,x86: Simplify computing label offsets commit dceba0817ca329868a15e2e1dd46eb6340b69206 upstream. Take an idea from the 32bit JIT, which uses the multi-pass nature of the JIT to compute the instruction offsets on a prior pass in order to compute the relative jump offsets on a later pass. Application to the x86_64 JIT is slightly more involved because the offsets depend on program variables (such as callee_regs_used and stack_depth) and hence the computed offsets need to be kept in the context of the JIT. This removes, IMO quite fragile, code that hard-codes the offsets and tries to compute the length of variable parts of it. Convert both emit_bpf_tail_call_*() functions which have an out: label at the end. Additionally emit_bpt_tail_call_direct() also has a poke table entry, for which it computes the offset from the end (and thus already relies on the previous pass to have computed addrs[i]), also convert this to be a forward based offset. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Borislav Petkov Acked-by: Alexei Starovoitov Acked-by: Josh Poimboeuf Tested-by: Alexei Starovoitov Link: https://lore.kernel.org/r/20211026120310.552304864@infradead.org Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Greg Kroah-Hartman --- arch/x86/net/bpf_jit_comp.c | 123 ++++++++++++------------------------ 1 file changed, 42 insertions(+), 81 deletions(-) diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index b0ffb5698408db..04758d30777219 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -224,6 +224,14 @@ static void jit_fill_hole(void *area, unsigned int size) struct jit_context { int cleanup_addr; /* Epilogue code offset */ + + /* + * Program specific offsets of labels in the code; these rely on the + * JIT doing at least 2 passes, recording the position on the first + * pass, only to generate the correct offset on the second pass. + */ + int tail_call_direct_label; + int tail_call_indirect_label; }; /* Maximum number of bytes emitted while JITing one eBPF insn */ @@ -379,22 +387,6 @@ int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t, return __bpf_arch_text_poke(ip, t, old_addr, new_addr, true); } -static int get_pop_bytes(bool *callee_regs_used) -{ - int bytes = 0; - - if (callee_regs_used[3]) - bytes += 2; - if (callee_regs_used[2]) - bytes += 2; - if (callee_regs_used[1]) - bytes += 2; - if (callee_regs_used[0]) - bytes += 1; - - return bytes; -} - /* * Generate the following code: * @@ -410,29 +402,12 @@ static int get_pop_bytes(bool *callee_regs_used) * out: */ static void emit_bpf_tail_call_indirect(u8 **pprog, bool *callee_regs_used, - u32 stack_depth) + u32 stack_depth, u8 *ip, + struct jit_context *ctx) { int tcc_off = -4 - round_up(stack_depth, 8); - u8 *prog = *pprog; - int pop_bytes = 0; - int off1 = 42; - int off2 = 31; - int off3 = 9; - - /* count the additional bytes used for popping callee regs from stack - * that need to be taken into account for each of the offsets that - * are used for bailing out of the tail call - */ - pop_bytes = get_pop_bytes(callee_regs_used); - off1 += pop_bytes; - off2 += pop_bytes; - off3 += pop_bytes; - - if (stack_depth) { - off1 += 7; - off2 += 7; - off3 += 7; - } + u8 *prog = *pprog, *start = *pprog; + int offset; /* * rdi - pointer to ctx @@ -447,8 +422,9 @@ static void emit_bpf_tail_call_indirect(u8 **pprog, bool *callee_regs_used, EMIT2(0x89, 0xD2); /* mov edx, edx */ EMIT3(0x39, 0x56, /* cmp dword ptr [rsi + 16], edx */ offsetof(struct bpf_array, map.max_entries)); -#define OFFSET1 (off1 + RETPOLINE_RCX_BPF_JIT_SIZE) /* Number of bytes to jump */ - EMIT2(X86_JBE, OFFSET1); /* jbe out */ + + offset = ctx->tail_call_indirect_label - (prog + 2 - start); + EMIT2(X86_JBE, offset); /* jbe out */ /* * if (tail_call_cnt > MAX_TAIL_CALL_CNT) @@ -456,8 +432,9 @@ static void emit_bpf_tail_call_indirect(u8 **pprog, bool *callee_regs_used, */ EMIT2_off32(0x8B, 0x85, tcc_off); /* mov eax, dword ptr [rbp - tcc_off] */ EMIT3(0x83, 0xF8, MAX_TAIL_CALL_CNT); /* cmp eax, MAX_TAIL_CALL_CNT */ -#define OFFSET2 (off2 + RETPOLINE_RCX_BPF_JIT_SIZE) - EMIT2(X86_JA, OFFSET2); /* ja out */ + + offset = ctx->tail_call_indirect_label - (prog + 2 - start); + EMIT2(X86_JA, offset); /* ja out */ EMIT3(0x83, 0xC0, 0x01); /* add eax, 1 */ EMIT2_off32(0x89, 0x85, tcc_off); /* mov dword ptr [rbp - tcc_off], eax */ @@ -470,12 +447,11 @@ static void emit_bpf_tail_call_indirect(u8 **pprog, bool *callee_regs_used, * goto out; */ EMIT3(0x48, 0x85, 0xC9); /* test rcx,rcx */ -#define OFFSET3 (off3 + RETPOLINE_RCX_BPF_JIT_SIZE) - EMIT2(X86_JE, OFFSET3); /* je out */ - *pprog = prog; - pop_callee_regs(pprog, callee_regs_used); - prog = *pprog; + offset = ctx->tail_call_indirect_label - (prog + 2 - start); + EMIT2(X86_JE, offset); /* je out */ + + pop_callee_regs(&prog, callee_regs_used); EMIT1(0x58); /* pop rax */ if (stack_depth) @@ -495,38 +471,18 @@ static void emit_bpf_tail_call_indirect(u8 **pprog, bool *callee_regs_used, RETPOLINE_RCX_BPF_JIT(); /* out: */ + ctx->tail_call_indirect_label = prog - start; *pprog = prog; } static void emit_bpf_tail_call_direct(struct bpf_jit_poke_descriptor *poke, - u8 **pprog, int addr, u8 *image, - bool *callee_regs_used, u32 stack_depth) + u8 **pprog, u8 *ip, + bool *callee_regs_used, u32 stack_depth, + struct jit_context *ctx) { int tcc_off = -4 - round_up(stack_depth, 8); - u8 *prog = *pprog; - int pop_bytes = 0; - int off1 = 20; - int poke_off; - - /* count the additional bytes used for popping callee regs to stack - * that need to be taken into account for jump offset that is used for - * bailing out from of the tail call when limit is reached - */ - pop_bytes = get_pop_bytes(callee_regs_used); - off1 += pop_bytes; - - /* - * total bytes for: - * - nop5/ jmpq $off - * - pop callee regs - * - sub rsp, $val if depth > 0 - * - pop rax - */ - poke_off = X86_PATCH_SIZE + pop_bytes + 1; - if (stack_depth) { - poke_off += 7; - off1 += 7; - } + u8 *prog = *pprog, *start = *pprog; + int offset; /* * if (tail_call_cnt > MAX_TAIL_CALL_CNT) @@ -534,28 +490,30 @@ static void emit_bpf_tail_call_direct(struct bpf_jit_poke_descriptor *poke, */ EMIT2_off32(0x8B, 0x85, tcc_off); /* mov eax, dword ptr [rbp - tcc_off] */ EMIT3(0x83, 0xF8, MAX_TAIL_CALL_CNT); /* cmp eax, MAX_TAIL_CALL_CNT */ - EMIT2(X86_JA, off1); /* ja out */ + + offset = ctx->tail_call_direct_label - (prog + 2 - start); + EMIT2(X86_JA, offset); /* ja out */ EMIT3(0x83, 0xC0, 0x01); /* add eax, 1 */ EMIT2_off32(0x89, 0x85, tcc_off); /* mov dword ptr [rbp - tcc_off], eax */ - poke->tailcall_bypass = image + (addr - poke_off - X86_PATCH_SIZE); + poke->tailcall_bypass = ip + (prog - start); poke->adj_off = X86_TAIL_CALL_OFFSET; - poke->tailcall_target = image + (addr - X86_PATCH_SIZE); + poke->tailcall_target = ip + ctx->tail_call_direct_label - X86_PATCH_SIZE; poke->bypass_addr = (u8 *)poke->tailcall_target + X86_PATCH_SIZE; emit_jump(&prog, (u8 *)poke->tailcall_target + X86_PATCH_SIZE, poke->tailcall_bypass); - *pprog = prog; - pop_callee_regs(pprog, callee_regs_used); - prog = *pprog; + pop_callee_regs(&prog, callee_regs_used); EMIT1(0x58); /* pop rax */ if (stack_depth) EMIT3_off32(0x48, 0x81, 0xC4, round_up(stack_depth, 8)); memcpy(prog, x86_nops[5], X86_PATCH_SIZE); prog += X86_PATCH_SIZE; + /* out: */ + ctx->tail_call_direct_label = prog - start; *pprog = prog; } @@ -1453,13 +1411,16 @@ st: if (is_imm8(insn->off)) case BPF_JMP | BPF_TAIL_CALL: if (imm32) emit_bpf_tail_call_direct(&bpf_prog->aux->poke_tab[imm32 - 1], - &prog, addrs[i], image, + &prog, image + addrs[i - 1], callee_regs_used, - bpf_prog->aux->stack_depth); + bpf_prog->aux->stack_depth, + ctx); else emit_bpf_tail_call_indirect(&prog, callee_regs_used, - bpf_prog->aux->stack_depth); + bpf_prog->aux->stack_depth, + image + addrs[i - 1], + ctx); break; /* cond jump */ From 32b2cd6060e7ab2984e750d0fbe79e1655cc7cbe Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 26 Oct 2021 14:01:48 +0200 Subject: [PATCH 0682/1056] bpf,x86: Respect X86_FEATURE_RETPOLINE* commit 87c87ecd00c54ecd677798cb49ef27329e0fab41 upstream. Current BPF codegen doesn't respect X86_FEATURE_RETPOLINE* flags and unconditionally emits a thunk call, this is sub-optimal and doesn't match the regular, compiler generated, code. Update the i386 JIT to emit code equal to what the compiler emits for the regular kernel text (IOW. a plain THUNK call). Update the x86_64 JIT to emit code similar to the result of compiler and kernel rewrites as according to X86_FEATURE_RETPOLINE* flags. Inlining RETPOLINE_AMD (lfence; jmp *%reg) and !RETPOLINE (jmp *%reg), while doing a THUNK call for RETPOLINE. This removes the hard-coded retpoline thunks and shrinks the generated code. Leaving a single retpoline thunk definition in the kernel. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Borislav Petkov Acked-by: Alexei Starovoitov Acked-by: Josh Poimboeuf Tested-by: Alexei Starovoitov Link: https://lore.kernel.org/r/20211026120310.614772675@infradead.org [cascardo: RETPOLINE_AMD was renamed to RETPOLINE_LFENCE] Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/nospec-branch.h | 59 ---------------------------- arch/x86/net/bpf_jit_comp.c | 46 +++++++++++----------- arch/x86/net/bpf_jit_comp32.c | 22 +++++++++-- 3 files changed, 41 insertions(+), 86 deletions(-) diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index f0970412b0cd92..da251a5645b0ec 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -320,63 +320,4 @@ static inline void mds_idle_clear_cpu_buffers(void) #endif /* __ASSEMBLY__ */ -/* - * Below is used in the eBPF JIT compiler and emits the byte sequence - * for the following assembly: - * - * With retpolines configured: - * - * callq do_rop - * spec_trap: - * pause - * lfence - * jmp spec_trap - * do_rop: - * mov %rcx,(%rsp) for x86_64 - * mov %edx,(%esp) for x86_32 - * retq - * - * Without retpolines configured: - * - * jmp *%rcx for x86_64 - * jmp *%edx for x86_32 - */ -#ifdef CONFIG_RETPOLINE -# ifdef CONFIG_X86_64 -# define RETPOLINE_RCX_BPF_JIT_SIZE 17 -# define RETPOLINE_RCX_BPF_JIT() \ -do { \ - EMIT1_off32(0xE8, 7); /* callq do_rop */ \ - /* spec_trap: */ \ - EMIT2(0xF3, 0x90); /* pause */ \ - EMIT3(0x0F, 0xAE, 0xE8); /* lfence */ \ - EMIT2(0xEB, 0xF9); /* jmp spec_trap */ \ - /* do_rop: */ \ - EMIT4(0x48, 0x89, 0x0C, 0x24); /* mov %rcx,(%rsp) */ \ - EMIT1(0xC3); /* retq */ \ -} while (0) -# else /* !CONFIG_X86_64 */ -# define RETPOLINE_EDX_BPF_JIT() \ -do { \ - EMIT1_off32(0xE8, 7); /* call do_rop */ \ - /* spec_trap: */ \ - EMIT2(0xF3, 0x90); /* pause */ \ - EMIT3(0x0F, 0xAE, 0xE8); /* lfence */ \ - EMIT2(0xEB, 0xF9); /* jmp spec_trap */ \ - /* do_rop: */ \ - EMIT3(0x89, 0x14, 0x24); /* mov %edx,(%esp) */ \ - EMIT1(0xC3); /* ret */ \ -} while (0) -# endif -#else /* !CONFIG_RETPOLINE */ -# ifdef CONFIG_X86_64 -# define RETPOLINE_RCX_BPF_JIT_SIZE 2 -# define RETPOLINE_RCX_BPF_JIT() \ - EMIT2(0xFF, 0xE1); /* jmp *%rcx */ -# else /* !CONFIG_X86_64 */ -# define RETPOLINE_EDX_BPF_JIT() \ - EMIT2(0xFF, 0xE2) /* jmp *%edx */ -# endif -#endif - #endif /* _ASM_X86_NOSPEC_BRANCH_H_ */ diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 04758d30777219..c9045985183c08 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -387,6 +387,25 @@ int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t, return __bpf_arch_text_poke(ip, t, old_addr, new_addr, true); } +#define EMIT_LFENCE() EMIT3(0x0F, 0xAE, 0xE8) + +static void emit_indirect_jump(u8 **pprog, int reg, u8 *ip) +{ + u8 *prog = *pprog; + +#ifdef CONFIG_RETPOLINE + if (cpu_feature_enabled(X86_FEATURE_RETPOLINE_LFENCE)) { + EMIT_LFENCE(); + EMIT2(0xFF, 0xE0 + reg); + } else if (cpu_feature_enabled(X86_FEATURE_RETPOLINE)) { + emit_jump(&prog, &__x86_indirect_thunk_array[reg], ip); + } else +#endif + EMIT2(0xFF, 0xE0 + reg); + + *pprog = prog; +} + /* * Generate the following code: * @@ -468,7 +487,7 @@ static void emit_bpf_tail_call_indirect(u8 **pprog, bool *callee_regs_used, * rdi == ctx (1st arg) * rcx == prog->bpf_func + X86_TAIL_CALL_OFFSET */ - RETPOLINE_RCX_BPF_JIT(); + emit_indirect_jump(&prog, 1 /* rcx */, ip + (prog - start)); /* out: */ ctx->tail_call_indirect_label = prog - start; @@ -1185,8 +1204,7 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, /* speculation barrier */ case BPF_ST | BPF_NOSPEC: if (boot_cpu_has(X86_FEATURE_XMM2)) - /* Emit 'lfence' */ - EMIT3(0x0F, 0xAE, 0xE8); + EMIT_LFENCE(); break; /* ST: *(u8*)(dst_reg + off) = imm */ @@ -2122,24 +2140,6 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i return ret; } -static int emit_fallback_jump(u8 **pprog) -{ - u8 *prog = *pprog; - int err = 0; - -#ifdef CONFIG_RETPOLINE - /* Note that this assumes the the compiler uses external - * thunks for indirect calls. Both clang and GCC use the same - * naming convention for external thunks. - */ - err = emit_jump(&prog, __x86_indirect_thunk_rdx, prog); -#else - EMIT2(0xFF, 0xE2); /* jmp rdx */ -#endif - *pprog = prog; - return err; -} - static int emit_bpf_dispatcher(u8 **pprog, int a, int b, s64 *progs) { u8 *jg_reloc, *prog = *pprog; @@ -2161,9 +2161,7 @@ static int emit_bpf_dispatcher(u8 **pprog, int a, int b, s64 *progs) if (err) return err; - err = emit_fallback_jump(&prog); /* jmp thunk/indirect */ - if (err) - return err; + emit_indirect_jump(&prog, 2 /* rdx */, prog); *pprog = prog; return 0; diff --git a/arch/x86/net/bpf_jit_comp32.c b/arch/x86/net/bpf_jit_comp32.c index 3bfda5f502cb85..da9b7cfa463297 100644 --- a/arch/x86/net/bpf_jit_comp32.c +++ b/arch/x86/net/bpf_jit_comp32.c @@ -15,6 +15,7 @@ #include #include #include +#include #include /* @@ -1267,6 +1268,21 @@ static void emit_epilogue(u8 **pprog, u32 stack_depth) *pprog = prog; } +static int emit_jmp_edx(u8 **pprog, u8 *ip) +{ + u8 *prog = *pprog; + int cnt = 0; + +#ifdef CONFIG_RETPOLINE + EMIT1_off32(0xE9, (u8 *)__x86_indirect_thunk_edx - (ip + 5)); +#else + EMIT2(0xFF, 0xE2); +#endif + *pprog = prog; + + return cnt; +} + /* * Generate the following code: * ... bpf_tail_call(void *ctx, struct bpf_array *array, u64 index) ... @@ -1280,7 +1296,7 @@ static void emit_epilogue(u8 **pprog, u32 stack_depth) * goto *(prog->bpf_func + prologue_size); * out: */ -static void emit_bpf_tail_call(u8 **pprog) +static void emit_bpf_tail_call(u8 **pprog, u8 *ip) { u8 *prog = *pprog; int cnt = 0; @@ -1362,7 +1378,7 @@ static void emit_bpf_tail_call(u8 **pprog) * eax == ctx (1st arg) * edx == prog->bpf_func + prologue_size */ - RETPOLINE_EDX_BPF_JIT(); + cnt += emit_jmp_edx(&prog, ip + cnt); if (jmp_label1 == -1) jmp_label1 = cnt; @@ -2122,7 +2138,7 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, break; } case BPF_JMP | BPF_TAIL_CALL: - emit_bpf_tail_call(&prog); + emit_bpf_tail_call(&prog, image + addrs[i - 1]); break; /* cond jump */ From 40265bcd1bd3a00e13aa51330aa2b5a4b6aaf338 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 8 Mar 2022 16:30:14 +0100 Subject: [PATCH 0683/1056] objtool: Default ignore INT3 for unreachable commit 1ffbe4e935f9b7308615c75be990aec07464d1e7 upstream. Ignore all INT3 instructions for unreachable code warnings, similar to NOP. This allows using INT3 for various paddings instead of NOPs. Signed-off-by: Peter Zijlstra (Intel) Acked-by: Josh Poimboeuf Link: https://lore.kernel.org/r/20220308154317.343312938@infradead.org Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Greg Kroah-Hartman --- tools/objtool/check.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/tools/objtool/check.c b/tools/objtool/check.c index a8320f0df786bd..d3e0ec9fd2e0c3 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -2965,9 +2965,8 @@ static int validate_branch(struct objtool_file *file, struct symbol *func, switch (insn->type) { case INSN_RETURN: - if (next_insn && next_insn->type == INSN_TRAP) { - next_insn->ignore = true; - } else if (sls && !insn->retpoline_safe) { + if (sls && !insn->retpoline_safe && + next_insn && next_insn->type != INSN_TRAP) { WARN_FUNC("missing int3 after ret", insn->sec, insn->offset); } @@ -3014,9 +3013,8 @@ static int validate_branch(struct objtool_file *file, struct symbol *func, break; case INSN_JUMP_DYNAMIC: - if (next_insn && next_insn->type == INSN_TRAP) { - next_insn->ignore = true; - } else if (sls && !insn->retpoline_safe) { + if (sls && !insn->retpoline_safe && + next_insn && next_insn->type != INSN_TRAP) { WARN_FUNC("missing int3 after indirect jump", insn->sec, insn->offset); } @@ -3187,7 +3185,7 @@ static bool ignore_unreachable_insn(struct objtool_file *file, struct instructio int i; struct instruction *prev_insn; - if (insn->ignore || insn->type == INSN_NOP) + if (insn->ignore || insn->type == INSN_NOP || insn->type == INSN_TRAP) return true; /* From 6834878ace6e09c332f4c2bc5df2fb33a9e82126 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 6 May 2022 14:14:35 +0200 Subject: [PATCH 0684/1056] x86/entry: Remove skip_r11rcx commit 1b331eeea7b8676fc5dbdf80d0a07e41be226177 upstream. Yes, r11 and rcx have been restored previously, but since they're being popped anyway (into rsi) might as well pop them into their own regs -- setting them to the value they already are. Less magical code. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/20220506121631.365070674@infradead.org Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Greg Kroah-Hartman --- arch/x86/entry/calling.h | 10 +--------- arch/x86/entry/entry_64.S | 3 +-- 2 files changed, 2 insertions(+), 11 deletions(-) diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h index a4c061fb7c6ea0..2b12c691fbde4c 100644 --- a/arch/x86/entry/calling.h +++ b/arch/x86/entry/calling.h @@ -119,27 +119,19 @@ For 32-bit we have the following conventions - kernel is built with CLEAR_REGS .endm -.macro POP_REGS pop_rdi=1 skip_r11rcx=0 +.macro POP_REGS pop_rdi=1 popq %r15 popq %r14 popq %r13 popq %r12 popq %rbp popq %rbx - .if \skip_r11rcx - popq %rsi - .else popq %r11 - .endif popq %r10 popq %r9 popq %r8 popq %rax - .if \skip_r11rcx - popq %rsi - .else popq %rcx - .endif popq %rdx popq %rsi .if \pop_rdi diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 6ff78841b975c0..18c57d829b598d 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -189,8 +189,7 @@ SYM_INNER_LABEL(entry_SYSCALL_64_after_hwframe, SYM_L_GLOBAL) * perf profiles. Nothing jumps here. */ syscall_return_via_sysret: - /* rcx and r11 are already restored (see code above) */ - POP_REGS pop_rdi=0 skip_r11rcx=1 + POP_REGS pop_rdi=0 /* * Now all regs are restored except RSP and RDI. From 2cd972ae337f4ff40b31513b324ff846a0247437 Mon Sep 17 00:00:00 2001 From: Thadeu Lima de Souza Cascardo Date: Fri, 1 Jul 2022 11:21:20 -0300 Subject: [PATCH 0685/1056] x86/realmode: build with -D__DISABLE_EXPORTS Commit 156ff4a544ae ("x86/ibt: Base IBT bits") added this option when building realmode in order to disable IBT there. This is also needed in order to disable return thunks. Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Greg Kroah-Hartman --- arch/x86/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 300227818206bc..9c09bbd390cecf 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -24,7 +24,7 @@ endif # How to compile the 16-bit code. Note we always compile for -march=i386; # that way we can complain to the user if the CPU is insufficient. -REALMODE_CFLAGS := -m16 -g -Os -DDISABLE_BRANCH_PROFILING \ +REALMODE_CFLAGS := -m16 -g -Os -DDISABLE_BRANCH_PROFILING -D__DISABLE_EXPORTS \ -Wall -Wstrict-prototypes -march=i386 -mregparm=3 \ -fno-strict-aliasing -fomit-frame-pointer -fno-pic \ -mno-mmx -mno-sse $(call cc-option,-fcf-protection=none) From ccb25d7db1a29bc251692be745b000e6f0754048 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 14 Jun 2022 23:15:32 +0200 Subject: [PATCH 0686/1056] x86/kvm/vmx: Make noinstr clean commit 742ab6df974ae8384a2dd213db1a3a06cf6d8936 upstream. The recent mmio_stale_data fixes broke the noinstr constraints: vmlinux.o: warning: objtool: vmx_vcpu_enter_exit+0x15b: call to wrmsrl.constprop.0() leaves .noinstr.text section vmlinux.o: warning: objtool: vmx_vcpu_enter_exit+0x1bf: call to kvm_arch_has_assigned_device() leaves .noinstr.text section make it all happy again. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/vmx/vmx.c | 6 +++--- arch/x86/kvm/x86.c | 4 ++-- include/linux/kvm_host.h | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 1533ab7b579e58..54724f520556f5 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -380,9 +380,9 @@ static __always_inline void vmx_disable_fb_clear(struct vcpu_vmx *vmx) if (!vmx->disable_fb_clear) return; - rdmsrl(MSR_IA32_MCU_OPT_CTRL, msr); + msr = __rdmsr(MSR_IA32_MCU_OPT_CTRL); msr |= FB_CLEAR_DIS; - wrmsrl(MSR_IA32_MCU_OPT_CTRL, msr); + native_wrmsrl(MSR_IA32_MCU_OPT_CTRL, msr); /* Cache the MSR value to avoid reading it later */ vmx->msr_ia32_mcu_opt_ctrl = msr; } @@ -393,7 +393,7 @@ static __always_inline void vmx_enable_fb_clear(struct vcpu_vmx *vmx) return; vmx->msr_ia32_mcu_opt_ctrl &= ~FB_CLEAR_DIS; - wrmsrl(MSR_IA32_MCU_OPT_CTRL, vmx->msr_ia32_mcu_opt_ctrl); + native_wrmsrl(MSR_IA32_MCU_OPT_CTRL, vmx->msr_ia32_mcu_opt_ctrl); } static void vmx_update_fb_clear_dis(struct kvm_vcpu *vcpu, struct vcpu_vmx *vmx) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 732c3f2f8ded4d..4525d0b25a432d 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -12179,9 +12179,9 @@ void kvm_arch_end_assignment(struct kvm *kvm) } EXPORT_SYMBOL_GPL(kvm_arch_end_assignment); -bool kvm_arch_has_assigned_device(struct kvm *kvm) +bool noinstr kvm_arch_has_assigned_device(struct kvm *kvm) { - return atomic_read(&kvm->arch.assigned_device_count); + return arch_atomic_read(&kvm->arch.assigned_device_count); } EXPORT_SYMBOL_GPL(kvm_arch_has_assigned_device); diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 926f60499c347c..fb70dd4ff3b605 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1233,7 +1233,7 @@ static inline void kvm_arch_end_assignment(struct kvm *kvm) { } -static inline bool kvm_arch_has_assigned_device(struct kvm *kvm) +static __always_inline bool kvm_arch_has_assigned_device(struct kvm *kvm) { return false; } From eee4f31fa2ebc5eb88f3b7320b909dc580c7de47 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 14 Jun 2022 23:15:33 +0200 Subject: [PATCH 0687/1056] x86/cpufeatures: Move RETPOLINE flags to word 11 commit a883d624aed463c84c22596006e5a96f5b44db31 upstream. In order to extend the RETPOLINE features to 4, move them to word 11 where there is still room. This mostly keeps DISABLE_RETPOLINE simple. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov Reviewed-by: Josh Poimboeuf Signed-off-by: Borislav Petkov Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/cpufeatures.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index b258efa8bf434f..57dc9f822e8b24 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -203,8 +203,8 @@ #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ /* FREE! ( 7*32+10) */ #define X86_FEATURE_PTI ( 7*32+11) /* Kernel Page Table Isolation enabled */ -#define X86_FEATURE_RETPOLINE ( 7*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */ -#define X86_FEATURE_RETPOLINE_LFENCE ( 7*32+13) /* "" Use LFENCE for Spectre variant 2 */ +/* FREE! ( 7*32+12) */ +/* FREE! ( 7*32+13) */ #define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */ #define X86_FEATURE_CDP_L2 ( 7*32+15) /* Code and Data Prioritization L2 */ #define X86_FEATURE_MSR_SPEC_CTRL ( 7*32+16) /* "" MSR SPEC_CTRL is implemented */ @@ -294,6 +294,10 @@ #define X86_FEATURE_PER_THREAD_MBA (11*32+ 7) /* "" Per-thread Memory Bandwidth Allocation */ #define X86_FEATURE_SGX1 (11*32+ 8) /* "" Basic SGX */ #define X86_FEATURE_SGX2 (11*32+ 9) /* "" SGX Enclave Dynamic Memory Management (EDMM) */ +/* FREE! (11*32+10) */ +/* FREE! (11*32+11) */ +#define X86_FEATURE_RETPOLINE (11*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */ +#define X86_FEATURE_RETPOLINE_LFENCE (11*32+13) /* "" Use LFENCE for Spectre variant 2 */ /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */ #define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */ From 545b45c36a2f5f217d059478f29cefa9f1afe26c Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 14 Jun 2022 23:15:34 +0200 Subject: [PATCH 0688/1056] x86/retpoline: Cleanup some #ifdefery commit 369ae6ffc41a3c1137cab697635a84d0cc7cdcea upstream. On it's own not much of a cleanup but it prepares for more/similar code. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov Reviewed-by: Borislav Petkov Reviewed-by: Josh Poimboeuf Signed-off-by: Borislav Petkov [cascardo: conflict fixup because of DISABLE_ENQCMD] [cascardo: no changes at nospec-branch.h and bpf_jit_comp.c] Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/disabled-features.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h index 8f28fafa98b32e..65e3ab5c3e2a12 100644 --- a/arch/x86/include/asm/disabled-features.h +++ b/arch/x86/include/asm/disabled-features.h @@ -56,6 +56,13 @@ # define DISABLE_PTI (1 << (X86_FEATURE_PTI & 31)) #endif +#ifdef CONFIG_RETPOLINE +# define DISABLE_RETPOLINE 0 +#else +# define DISABLE_RETPOLINE ((1 << (X86_FEATURE_RETPOLINE & 31)) | \ + (1 << (X86_FEATURE_RETPOLINE_LFENCE & 31))) +#endif + /* Force disable because it's broken beyond repair */ #define DISABLE_ENQCMD (1 << (X86_FEATURE_ENQCMD & 31)) @@ -79,7 +86,7 @@ #define DISABLED_MASK8 0 #define DISABLED_MASK9 (DISABLE_SMAP|DISABLE_SGX) #define DISABLED_MASK10 0 -#define DISABLED_MASK11 0 +#define DISABLED_MASK11 (DISABLE_RETPOLINE) #define DISABLED_MASK12 0 #define DISABLED_MASK13 0 #define DISABLED_MASK14 0 From bb81f3ac69f854c99444a55f2b4735ba8875732a Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 14 Jun 2022 23:15:35 +0200 Subject: [PATCH 0689/1056] x86/retpoline: Swizzle retpoline thunk commit 00e1533325fd1fb5459229fe37f235462649f668 upstream. Put the actual retpoline thunk as the original code so that it can become more complicated. Specifically, it allows RET to be a JMP, which can't be .altinstr_replacement since that doesn't do relocations (except for the very first instruction). Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov Reviewed-by: Borislav Petkov Reviewed-by: Josh Poimboeuf Signed-off-by: Borislav Petkov Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Greg Kroah-Hartman --- arch/x86/lib/retpoline.S | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S index afbdda539b8010..c013241d9188be 100644 --- a/arch/x86/lib/retpoline.S +++ b/arch/x86/lib/retpoline.S @@ -32,9 +32,9 @@ SYM_INNER_LABEL(__x86_indirect_thunk_\reg, SYM_L_GLOBAL) UNWIND_HINT_EMPTY - ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), \ - __stringify(RETPOLINE \reg), X86_FEATURE_RETPOLINE, \ - __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *%\reg; int3), X86_FEATURE_RETPOLINE_LFENCE + ALTERNATIVE_2 __stringify(RETPOLINE \reg), \ + __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *%\reg; int3), X86_FEATURE_RETPOLINE_LFENCE, \ + __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), ALT_NOT(X86_FEATURE_RETPOLINE) .endm From 023a2b07d5c3436427d7b902d7cddc16469027ba Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 14 Jun 2022 23:15:36 +0200 Subject: [PATCH 0690/1056] x86/retpoline: Use -mfunction-return commit 0b53c374b9eff2255a386f1f1cfb9a928e52a5ae upstream. Utilize -mfunction-return=thunk-extern when available to have the compiler replace RET instructions with direct JMPs to the symbol __x86_return_thunk. This does not affect assembler (.S) sources, only C sources. -mfunction-return=thunk-extern has been available since gcc 7.3 and clang 15. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov Reviewed-by: Nick Desaulniers Reviewed-by: Josh Poimboeuf Tested-by: Nick Desaulniers Signed-off-by: Borislav Petkov [cascardo: RETPOLINE_CFLAGS is at Makefile] [cascardo: remove ANNOTATE_NOENDBR from __x86_return_thunk] Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Greg Kroah-Hartman --- Makefile | 2 ++ arch/x86/include/asm/nospec-branch.h | 2 ++ arch/x86/lib/retpoline.S | 12 ++++++++++++ 3 files changed, 16 insertions(+) diff --git a/Makefile b/Makefile index 2ce44168b1b543..64190738e71357 100644 --- a/Makefile +++ b/Makefile @@ -687,11 +687,13 @@ endif ifdef CONFIG_CC_IS_GCC RETPOLINE_CFLAGS := $(call cc-option,-mindirect-branch=thunk-extern -mindirect-branch-register) +RETPOLINE_CFLAGS += $(call cc-option,-mfunction-return=thunk-extern) RETPOLINE_VDSO_CFLAGS := $(call cc-option,-mindirect-branch=thunk-inline -mindirect-branch-register) endif ifdef CONFIG_CC_IS_CLANG RETPOLINE_CFLAGS := -mretpoline-external-thunk RETPOLINE_VDSO_CFLAGS := -mretpoline +RETPOLINE_CFLAGS += $(call cc-option,-mfunction-return=thunk-extern) endif export RETPOLINE_CFLAGS export RETPOLINE_VDSO_CFLAGS diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index da251a5645b0ec..2b0d71f00c29c7 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -120,6 +120,8 @@ _ASM_PTR " 999b\n\t" \ ".popsection\n\t" +extern void __x86_return_thunk(void); + #ifdef CONFIG_RETPOLINE typedef u8 retpoline_thunk_t[RETPOLINE_THUNK_SIZE]; diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S index c013241d9188be..01667ea9da0204 100644 --- a/arch/x86/lib/retpoline.S +++ b/arch/x86/lib/retpoline.S @@ -66,3 +66,15 @@ SYM_CODE_END(__x86_indirect_thunk_array) #define GEN(reg) EXPORT_THUNK(reg) #include #undef GEN + +/* + * This function name is magical and is used by -mfunction-return=thunk-extern + * for the compiler to generate JMPs to it. + */ +SYM_CODE_START(__x86_return_thunk) + UNWIND_HINT_EMPTY + ret + int3 +SYM_CODE_END(__x86_return_thunk) + +__EXPORT_THUNK(__x86_return_thunk) From 7bf553d9eebd6bdf70e10a2fc3a7fa6dd482347e Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 14 Jun 2022 23:15:37 +0200 Subject: [PATCH 0691/1056] x86: Undo return-thunk damage commit 15e67227c49a57837108acfe1c80570e1bd9f962 upstream. Introduce X86_FEATURE_RETHUNK for those afflicted with needing this. [ bp: Do only INT3 padding - simpler. ] Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov Reviewed-by: Josh Poimboeuf Signed-off-by: Borislav Petkov [cascardo: CONFIG_STACK_VALIDATION vs CONFIG_OBJTOOL] [cascardo: no IBT support] Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/alternative.h | 1 + arch/x86/include/asm/cpufeatures.h | 1 + arch/x86/include/asm/disabled-features.h | 3 +- arch/x86/kernel/alternative.c | 60 ++++++++++++++++++++++++ arch/x86/kernel/module.c | 8 +++- arch/x86/kernel/vmlinux.lds.S | 7 +++ 6 files changed, 78 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index 58eee640283254..a364971967c40e 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h @@ -76,6 +76,7 @@ extern int alternatives_patched; extern void alternative_instructions(void); extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end); extern void apply_retpolines(s32 *start, s32 *end); +extern void apply_returns(s32 *start, s32 *end); struct module; diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 57dc9f822e8b24..b1fd846f121edf 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -298,6 +298,7 @@ /* FREE! (11*32+11) */ #define X86_FEATURE_RETPOLINE (11*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */ #define X86_FEATURE_RETPOLINE_LFENCE (11*32+13) /* "" Use LFENCE for Spectre variant 2 */ +#define X86_FEATURE_RETHUNK (11*32+14) /* "" Use REturn THUNK */ /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */ #define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */ diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h index 65e3ab5c3e2a12..d7c6e648ff3892 100644 --- a/arch/x86/include/asm/disabled-features.h +++ b/arch/x86/include/asm/disabled-features.h @@ -60,7 +60,8 @@ # define DISABLE_RETPOLINE 0 #else # define DISABLE_RETPOLINE ((1 << (X86_FEATURE_RETPOLINE & 31)) | \ - (1 << (X86_FEATURE_RETPOLINE_LFENCE & 31))) + (1 << (X86_FEATURE_RETPOLINE_LFENCE & 31)) | \ + (1 << (X86_FEATURE_RETHUNK & 31))) #endif /* Force disable because it's broken beyond repair */ diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index b9d062597954ee..aa0cadfba2556a 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -115,6 +115,7 @@ static void __init_or_module add_nops(void *insns, unsigned int len) } extern s32 __retpoline_sites[], __retpoline_sites_end[]; +extern s32 __return_sites[], __return_sites_end[]; extern struct alt_instr __alt_instructions[], __alt_instructions_end[]; extern s32 __smp_locks[], __smp_locks_end[]; void text_poke_early(void *addr, const void *opcode, size_t len); @@ -506,9 +507,67 @@ void __init_or_module noinline apply_retpolines(s32 *start, s32 *end) } } +/* + * Rewrite the compiler generated return thunk tail-calls. + * + * For example, convert: + * + * JMP __x86_return_thunk + * + * into: + * + * RET + */ +static int patch_return(void *addr, struct insn *insn, u8 *bytes) +{ + int i = 0; + + if (cpu_feature_enabled(X86_FEATURE_RETHUNK)) + return -1; + + bytes[i++] = RET_INSN_OPCODE; + + for (; i < insn->length;) + bytes[i++] = INT3_INSN_OPCODE; + + return i; +} + +void __init_or_module noinline apply_returns(s32 *start, s32 *end) +{ + s32 *s; + + for (s = start; s < end; s++) { + void *addr = (void *)s + *s; + struct insn insn; + int len, ret; + u8 bytes[16]; + u8 op1; + + ret = insn_decode_kernel(&insn, addr); + if (WARN_ON_ONCE(ret < 0)) + continue; + + op1 = insn.opcode.bytes[0]; + if (WARN_ON_ONCE(op1 != JMP32_INSN_OPCODE)) + continue; + + DPRINTK("return thunk at: %pS (%px) len: %d to: %pS", + addr, addr, insn.length, + addr + insn.length + insn.immediate.value); + + len = patch_return(addr, &insn, bytes); + if (len == insn.length) { + DUMP_BYTES(((u8*)addr), len, "%px: orig: ", addr); + DUMP_BYTES(((u8*)bytes), len, "%px: repl: ", addr); + text_poke_early(addr, bytes, len); + } + } +} #else /* !RETPOLINES || !CONFIG_STACK_VALIDATION */ void __init_or_module noinline apply_retpolines(s32 *start, s32 *end) { } +void __init_or_module noinline apply_returns(s32 *start, s32 *end) { } #endif /* CONFIG_RETPOLINE && CONFIG_STACK_VALIDATION */ @@ -824,6 +883,7 @@ void __init alternative_instructions(void) * those can rewrite the retpoline thunks. */ apply_retpolines(__retpoline_sites, __retpoline_sites_end); + apply_returns(__return_sites, __return_sites_end); /* * Then patch alternatives, such that those paravirt calls that are in diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c index 96d7c27b7093db..06b53ea940bf60 100644 --- a/arch/x86/kernel/module.c +++ b/arch/x86/kernel/module.c @@ -253,7 +253,7 @@ int module_finalize(const Elf_Ehdr *hdr, { const Elf_Shdr *s, *text = NULL, *alt = NULL, *locks = NULL, *para = NULL, *orc = NULL, *orc_ip = NULL, - *retpolines = NULL; + *retpolines = NULL, *returns = NULL; char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) { @@ -271,6 +271,8 @@ int module_finalize(const Elf_Ehdr *hdr, orc_ip = s; if (!strcmp(".retpoline_sites", secstrings + s->sh_name)) retpolines = s; + if (!strcmp(".return_sites", secstrings + s->sh_name)) + returns = s; } /* @@ -285,6 +287,10 @@ int module_finalize(const Elf_Ehdr *hdr, void *rseg = (void *)retpolines->sh_addr; apply_retpolines(rseg, rseg + retpolines->sh_size); } + if (returns) { + void *rseg = (void *)returns->sh_addr; + apply_returns(rseg, rseg + returns->sh_size); + } if (alt) { /* patch .altinstructions */ void *aseg = (void *)alt->sh_addr; diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 3d6dc12d198f7c..a99d016df56f6a 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -284,6 +284,13 @@ SECTIONS *(.retpoline_sites) __retpoline_sites_end = .; } + + . = ALIGN(8); + .return_sites : AT(ADDR(.return_sites) - LOAD_OFFSET) { + __return_sites = .; + *(.return_sites) + __return_sites_end = .; + } #endif /* From 1920e4be8a975f769a9c2d2e77a1f1a02c88b58a Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 14 Jun 2022 23:15:38 +0200 Subject: [PATCH 0692/1056] x86,objtool: Create .return_sites commit d9e9d2300681d68a775c28de6aa6e5290ae17796 upstream. Find all the return-thunk sites and record them in a .return_sites section such that the kernel can undo this. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov Reviewed-by: Josh Poimboeuf Signed-off-by: Borislav Petkov [cascardo: conflict fixup because of functions added to support IBT] Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Greg Kroah-Hartman --- tools/objtool/arch/x86/decode.c | 5 ++ tools/objtool/check.c | 75 +++++++++++++++++++++++++ tools/objtool/include/objtool/arch.h | 1 + tools/objtool/include/objtool/elf.h | 1 + tools/objtool/include/objtool/objtool.h | 1 + tools/objtool/objtool.c | 1 + 6 files changed, 84 insertions(+) diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c index 3d8facc65d813a..f62db0e006e9c2 100644 --- a/tools/objtool/arch/x86/decode.c +++ b/tools/objtool/arch/x86/decode.c @@ -722,3 +722,8 @@ bool arch_is_retpoline(struct symbol *sym) { return !strncmp(sym->name, "__x86_indirect_", 15); } + +bool arch_is_rethunk(struct symbol *sym) +{ + return !strcmp(sym->name, "__x86_return_thunk"); +} diff --git a/tools/objtool/check.c b/tools/objtool/check.c index d3e0ec9fd2e0c3..41d111c69620d6 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -654,6 +654,52 @@ static int create_retpoline_sites_sections(struct objtool_file *file) return 0; } +static int create_return_sites_sections(struct objtool_file *file) +{ + struct instruction *insn; + struct section *sec; + int idx; + + sec = find_section_by_name(file->elf, ".return_sites"); + if (sec) { + WARN("file already has .return_sites, skipping"); + return 0; + } + + idx = 0; + list_for_each_entry(insn, &file->return_thunk_list, call_node) + idx++; + + if (!idx) + return 0; + + sec = elf_create_section(file->elf, ".return_sites", 0, + sizeof(int), idx); + if (!sec) { + WARN("elf_create_section: .return_sites"); + return -1; + } + + idx = 0; + list_for_each_entry(insn, &file->return_thunk_list, call_node) { + + int *site = (int *)sec->data->d_buf + idx; + *site = 0; + + if (elf_add_reloc_to_insn(file->elf, sec, + idx * sizeof(int), + R_X86_64_PC32, + insn->sec, insn->offset)) { + WARN("elf_add_reloc_to_insn: .return_sites"); + return -1; + } + + idx++; + } + + return 0; +} + static int create_mcount_loc_sections(struct objtool_file *file) { struct section *sec; @@ -932,6 +978,11 @@ __weak bool arch_is_retpoline(struct symbol *sym) return false; } +__weak bool arch_is_rethunk(struct symbol *sym) +{ + return false; +} + #define NEGATIVE_RELOC ((void *)-1L) static struct reloc *insn_reloc(struct objtool_file *file, struct instruction *insn) @@ -1092,6 +1143,19 @@ static void add_retpoline_call(struct objtool_file *file, struct instruction *in annotate_call_site(file, insn, false); } + +static void add_return_call(struct objtool_file *file, struct instruction *insn) +{ + /* + * Return thunk tail calls are really just returns in disguise, + * so convert them accordingly. + */ + insn->type = INSN_RETURN; + insn->retpoline_safe = true; + + list_add_tail(&insn->call_node, &file->return_thunk_list); +} + /* * Find the destination instructions for all jumps. */ @@ -1116,6 +1180,9 @@ static int add_jump_destinations(struct objtool_file *file) } else if (reloc->sym->retpoline_thunk) { add_retpoline_call(file, insn); continue; + } else if (reloc->sym->return_thunk) { + add_return_call(file, insn); + continue; } else if (insn->func) { /* internal or external sibling call (with reloc) */ add_call_dest(file, insn, reloc->sym, true); @@ -1937,6 +2004,9 @@ static int classify_symbols(struct objtool_file *file) if (arch_is_retpoline(func)) func->retpoline_thunk = true; + if (arch_is_rethunk(func)) + func->return_thunk = true; + if (!strcmp(func->name, "__fentry__")) func->fentry = true; @@ -3413,6 +3483,11 @@ int check(struct objtool_file *file) if (ret < 0) goto out; warnings += ret; + + ret = create_return_sites_sections(file); + if (ret < 0) + goto out; + warnings += ret; } if (mcount) { diff --git a/tools/objtool/include/objtool/arch.h b/tools/objtool/include/objtool/arch.h index deae99b423f358..8d57e3d1f763dd 100644 --- a/tools/objtool/include/objtool/arch.h +++ b/tools/objtool/include/objtool/arch.h @@ -88,6 +88,7 @@ const char *arch_ret_insn(int len); int arch_decode_hint_reg(u8 sp_reg, int *base); bool arch_is_retpoline(struct symbol *sym); +bool arch_is_rethunk(struct symbol *sym); int arch_rewrite_retpolines(struct objtool_file *file); diff --git a/tools/objtool/include/objtool/elf.h b/tools/objtool/include/objtool/elf.h index 08821307ac99b7..6cdfa401b00092 100644 --- a/tools/objtool/include/objtool/elf.h +++ b/tools/objtool/include/objtool/elf.h @@ -57,6 +57,7 @@ struct symbol { u8 uaccess_safe : 1; u8 static_call_tramp : 1; u8 retpoline_thunk : 1; + u8 return_thunk : 1; u8 fentry : 1; u8 kcov : 1; }; diff --git a/tools/objtool/include/objtool/objtool.h b/tools/objtool/include/objtool/objtool.h index 24fa83634de4d7..97b25a217c3a53 100644 --- a/tools/objtool/include/objtool/objtool.h +++ b/tools/objtool/include/objtool/objtool.h @@ -19,6 +19,7 @@ struct objtool_file { struct list_head insn_list; DECLARE_HASHTABLE(insn_hash, 20); struct list_head retpoline_call_list; + struct list_head return_thunk_list; struct list_head static_call_list; struct list_head mcount_loc_list; bool ignore_unreachables, c_file, hints, rodata; diff --git a/tools/objtool/objtool.c b/tools/objtool/objtool.c index e21db8bce4935c..24650d533d85c8 100644 --- a/tools/objtool/objtool.c +++ b/tools/objtool/objtool.c @@ -126,6 +126,7 @@ struct objtool_file *objtool_open_read(const char *_objname) INIT_LIST_HEAD(&file.insn_list); hash_init(file.insn_hash); INIT_LIST_HEAD(&file.retpoline_call_list); + INIT_LIST_HEAD(&file.return_thunk_list); INIT_LIST_HEAD(&file.static_call_list); INIT_LIST_HEAD(&file.mcount_loc_list); file.c_file = !vmlinux && find_section_by_name(file.elf, ".comment"); From b0fb9784cf803472bbdcd3d98364c27966fee679 Mon Sep 17 00:00:00 2001 From: Thadeu Lima de Souza Cascardo Date: Fri, 1 Jul 2022 09:00:45 -0300 Subject: [PATCH 0693/1056] objtool: skip non-text sections when adding return-thunk sites The .discard.text section is added in order to reserve BRK, with a temporary function just so it can give it a size. This adds a relocation to the return thunk, which objtool will add to the .return_sites section. Linking will then fail as there are references to the .discard.text section. Do not add instructions from non-text sections to the list of return thunk calls, avoiding the reference to .discard.text. Signed-off-by: Thadeu Lima de Souza Cascardo Acked-by: Josh Poimboeuf Signed-off-by: Greg Kroah-Hartman --- tools/objtool/check.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 41d111c69620d6..d80c54809a91de 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -1153,7 +1153,9 @@ static void add_return_call(struct objtool_file *file, struct instruction *insn) insn->type = INSN_RETURN; insn->retpoline_safe = true; - list_add_tail(&insn->call_node, &file->return_thunk_list); + /* Skip the non-text sections, specially .discard ones */ + if (insn->sec->text) + list_add_tail(&insn->call_node, &file->return_thunk_list); } /* From c1c80aabc7b968ccae5c134b4e4a30739e32efda Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 14 Jun 2022 23:15:39 +0200 Subject: [PATCH 0694/1056] x86,static_call: Use alternative RET encoding commit ee88d363d15617ff50ac24fab0ffec11113b2aeb upstream. In addition to teaching static_call about the new way to spell 'RET', there is an added complication in that static_call() is allowed to rewrite text before it is known which particular spelling is required. In order to deal with this; have a static_call specific fixup in the apply_return() 'alternative' patching routine that will rewrite the static_call trampoline to match the definite sequence. This in turn creates the problem of uniquely identifying static call trampolines. Currently trampolines are 8 bytes, the first 5 being the jmp.d32/ret sequence and the final 3 a byte sequence that spells out 'SCT'. This sequence is used in __static_call_validate() to ensure it is patching a trampoline and not a random other jmp.d32. That is, false-positives shouldn't be plenty, but aren't a big concern. OTOH the new __static_call_fixup() must not have false-positives, and 'SCT' decodes to the somewhat weird but semi plausible sequence: push %rbx rex.XB push %r12 Additionally, there are SLS concerns with immediate jumps. Combined it seems like a good moment to change the signature to a single 3 byte trap instruction that is unique to this usage and will not ever get generated by accident. As such, change the signature to: '0x0f, 0xb9, 0xcc', which decodes to: ud1 %esp, %ecx Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov Reviewed-by: Josh Poimboeuf Signed-off-by: Borislav Petkov [cascardo: skip validation as introduced by 2105a92748e8 ("static_call,x86: Robustify trampoline patching")] Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/static_call.h | 17 +++++++++++++ arch/x86/kernel/alternative.c | 12 ++++++---- arch/x86/kernel/static_call.c | 38 +++++++++++++++++++++++++++++- 3 files changed, 62 insertions(+), 5 deletions(-) diff --git a/arch/x86/include/asm/static_call.h b/arch/x86/include/asm/static_call.h index 34323456939274..7efce22d635523 100644 --- a/arch/x86/include/asm/static_call.h +++ b/arch/x86/include/asm/static_call.h @@ -21,6 +21,16 @@ * relative displacement across sections. */ +/* + * The trampoline is 8 bytes and of the general form: + * + * jmp.d32 \func + * ud1 %esp, %ecx + * + * That trailing #UD provides both a speculation stop and serves as a unique + * 3 byte signature identifying static call trampolines. Also see tramp_ud[] + * and __static_call_fixup(). + */ #define __ARCH_DEFINE_STATIC_CALL_TRAMP(name, insns) \ asm(".pushsection .static_call.text, \"ax\" \n" \ ".align 4 \n" \ @@ -34,8 +44,13 @@ #define ARCH_DEFINE_STATIC_CALL_TRAMP(name, func) \ __ARCH_DEFINE_STATIC_CALL_TRAMP(name, ".byte 0xe9; .long " #func " - (. + 4)") +#ifdef CONFIG_RETPOLINE +#define ARCH_DEFINE_STATIC_CALL_NULL_TRAMP(name) \ + __ARCH_DEFINE_STATIC_CALL_TRAMP(name, "jmp __x86_return_thunk") +#else #define ARCH_DEFINE_STATIC_CALL_NULL_TRAMP(name) \ __ARCH_DEFINE_STATIC_CALL_TRAMP(name, "ret; int3; nop; nop; nop") +#endif #define ARCH_ADD_TRAMP_KEY(name) \ @@ -44,4 +59,6 @@ ".long " STATIC_CALL_KEY_STR(name) " - . \n" \ ".popsection \n") +extern bool __static_call_fixup(void *tramp, u8 op, void *dest); + #endif /* _ASM_STATIC_CALL_H */ diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index aa0cadfba2556a..db475542d78b44 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -538,18 +538,22 @@ void __init_or_module noinline apply_returns(s32 *start, s32 *end) s32 *s; for (s = start; s < end; s++) { - void *addr = (void *)s + *s; + void *dest = NULL, *addr = (void *)s + *s; struct insn insn; int len, ret; u8 bytes[16]; - u8 op1; + u8 op; ret = insn_decode_kernel(&insn, addr); if (WARN_ON_ONCE(ret < 0)) continue; - op1 = insn.opcode.bytes[0]; - if (WARN_ON_ONCE(op1 != JMP32_INSN_OPCODE)) + op = insn.opcode.bytes[0]; + if (op == JMP32_INSN_OPCODE) + dest = addr + insn.length + insn.immediate.value; + + if (__static_call_fixup(addr, op, dest) || + WARN_ON_ONCE(dest != &__x86_return_thunk)) continue; DPRINTK("return thunk at: %pS (%px) len: %d to: %pS", diff --git a/arch/x86/kernel/static_call.c b/arch/x86/kernel/static_call.c index 3ec2cb881eef02..2425bea07cdc27 100644 --- a/arch/x86/kernel/static_call.c +++ b/arch/x86/kernel/static_call.c @@ -11,6 +11,13 @@ enum insn_type { RET = 3, /* tramp / site cond-tail-call */ }; +/* + * ud1 %esp, %ecx - a 3 byte #UD that is unique to trampolines, chosen such + * that there is no false-positive trampoline identification while also being a + * speculation stop. + */ +static const u8 tramp_ud[] = { 0x0f, 0xb9, 0xcc }; + /* * cs cs cs xorl %eax, %eax - a single 5 byte instruction that clears %[er]ax */ @@ -43,7 +50,10 @@ static void __ref __static_call_transform(void *insn, enum insn_type type, void break; case RET: - code = &retinsn; + if (cpu_feature_enabled(X86_FEATURE_RETHUNK)) + code = text_gen_insn(JMP32_INSN_OPCODE, insn, &__x86_return_thunk); + else + code = &retinsn; break; } @@ -109,3 +119,29 @@ void arch_static_call_transform(void *site, void *tramp, void *func, bool tail) mutex_unlock(&text_mutex); } EXPORT_SYMBOL_GPL(arch_static_call_transform); + +#ifdef CONFIG_RETPOLINE +/* + * This is called by apply_returns() to fix up static call trampolines, + * specifically ARCH_DEFINE_STATIC_CALL_NULL_TRAMP which is recorded as + * having a return trampoline. + * + * The problem is that static_call() is available before determining + * X86_FEATURE_RETHUNK and, by implication, running alternatives. + * + * This means that __static_call_transform() above can have overwritten the + * return trampoline and we now need to fix things up to be consistent. + */ +bool __static_call_fixup(void *tramp, u8 op, void *dest) +{ + if (memcmp(tramp+5, tramp_ud, 3)) { + /* Not a trampoline site, not our problem. */ + return false; + } + + if (op == RET_INSN_OPCODE || dest == &__x86_return_thunk) + __static_call_transform(tramp, RET, NULL); + + return true; +} +#endif From e54fcb0812faebd147de72bd37ad87cc4951c68c Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 14 Jun 2022 23:15:40 +0200 Subject: [PATCH 0695/1056] x86/ftrace: Use alternative RET encoding commit 1f001e9da6bbf482311e45e48f53c2bd2179e59c upstream. Use the return thunk in ftrace trampolines, if needed. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov Reviewed-by: Josh Poimboeuf Signed-off-by: Borislav Petkov [cascardo: still copy return from ftrace_stub] [cascardo: use memcpy(text_gen_insn) as there is no __text_gen_insn] Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/ftrace.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 847776cc1aa40c..74c2f88a43d0cc 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -308,7 +308,7 @@ union ftrace_op_code_union { } __attribute__((packed)); }; -#define RET_SIZE 1 + IS_ENABLED(CONFIG_SLS) +#define RET_SIZE (IS_ENABLED(CONFIG_RETPOLINE) ? 5 : 1 + IS_ENABLED(CONFIG_SLS)) static unsigned long create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size) @@ -367,7 +367,10 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size) /* The trampoline ends with ret(q) */ retq = (unsigned long)ftrace_stub; - ret = copy_from_kernel_nofault(ip, (void *)retq, RET_SIZE); + if (cpu_feature_enabled(X86_FEATURE_RETHUNK)) + memcpy(ip, text_gen_insn(JMP32_INSN_OPCODE, ip, &__x86_return_thunk), JMP32_INSN_SIZE); + else + ret = copy_from_kernel_nofault(ip, (void *)retq, RET_SIZE); if (WARN_ON(ret < 0)) goto fail; From 0bfe8be824aadc24da2963978a6c6e743f1e4f46 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 14 Jun 2022 23:15:41 +0200 Subject: [PATCH 0696/1056] x86/bpf: Use alternative RET encoding commit d77cfe594ad50e0bf95d457e02ccd578791b2a15 upstream. Use the return thunk in eBPF generated code, if needed. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov Reviewed-by: Josh Poimboeuf Signed-off-by: Borislav Petkov Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Greg Kroah-Hartman --- arch/x86/net/bpf_jit_comp.c | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index c9045985183c08..131f7ceb54dc60 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -406,6 +406,21 @@ static void emit_indirect_jump(u8 **pprog, int reg, u8 *ip) *pprog = prog; } +static void emit_return(u8 **pprog, u8 *ip) +{ + u8 *prog = *pprog; + + if (cpu_feature_enabled(X86_FEATURE_RETHUNK)) { + emit_jump(&prog, &__x86_return_thunk, ip); + } else { + EMIT1(0xC3); /* ret */ + if (IS_ENABLED(CONFIG_SLS)) + EMIT1(0xCC); /* int3 */ + } + + *pprog = prog; +} + /* * Generate the following code: * @@ -1681,7 +1696,7 @@ st: if (is_imm8(insn->off)) ctx->cleanup_addr = proglen; pop_callee_regs(&prog, callee_regs_used); EMIT1(0xC9); /* leave */ - EMIT1(0xC3); /* ret */ + emit_return(&prog, image + addrs[i - 1] + (prog - temp)); break; default: @@ -2127,7 +2142,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i if (flags & BPF_TRAMP_F_SKIP_FRAME) /* skip our return address and return to parent */ EMIT4(0x48, 0x83, 0xC4, 8); /* add rsp, 8 */ - EMIT1(0xC3); /* ret */ + emit_return(&prog, prog); /* Make sure the trampoline generation logic doesn't overflow */ if (WARN_ON_ONCE(prog > (u8 *)image_end - BPF_INSN_SAFETY)) { ret = -EFAULT; From 5b43965d58efb6ef15cd7988cb15116fb154ad54 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 14 Jun 2022 23:15:42 +0200 Subject: [PATCH 0697/1056] x86/kvm: Fix SETcc emulation for return thunks commit af2e140f34208a5dfb6b7a8ad2d56bda88f0524d upstream. Prepare the SETcc fastop stuff for when RET can be larger still. The tricky bit here is that the expressions should not only be constant C expressions, but also absolute GAS expressions. This means no ?: and 'true' is ~0. Also ensure em_setcc() has the same alignment as the actual FOP_SETCC() ops, this ensures there cannot be an alignment hole between em_setcc() and the first op. Additionally, add a .skip directive to the FOP_SETCC() macro to fill any remaining space with INT3 traps; however the primary purpose of this directive is to generate AS warnings when the remaining space goes negative. Which is a very good indication the alignment magic went side-ways. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov Reviewed-by: Josh Poimboeuf Signed-off-by: Borislav Petkov [cascardo: ignore ENDBR when computing SETCC_LENGTH] [cascardo: conflict fixup] Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/emulate.c | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 82eff14bd064d1..d7c4ba7217f679 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -321,13 +321,15 @@ static int fastop(struct x86_emulate_ctxt *ctxt, fastop_t fop); #define FOP_RET(name) \ __FOP_RET(#name) -#define FOP_START(op) \ +#define __FOP_START(op, align) \ extern void em_##op(struct fastop *fake); \ asm(".pushsection .text, \"ax\" \n\t" \ ".global em_" #op " \n\t" \ - ".align " __stringify(FASTOP_SIZE) " \n\t" \ + ".align " __stringify(align) " \n\t" \ "em_" #op ":\n\t" +#define FOP_START(op) __FOP_START(op, FASTOP_SIZE) + #define FOP_END \ ".popsection") @@ -431,15 +433,14 @@ static int fastop(struct x86_emulate_ctxt *ctxt, fastop_t fop); /* * Depending on .config the SETcc functions look like: * - * SETcc %al [3 bytes] - * RET [1 byte] - * INT3 [1 byte; CONFIG_SLS] - * - * Which gives possible sizes 4 or 5. When rounded up to the - * next power-of-two alignment they become 4 or 8. + * SETcc %al [3 bytes] + * RET | JMP __x86_return_thunk [1,5 bytes; CONFIG_RETPOLINE] + * INT3 [1 byte; CONFIG_SLS] */ -#define SETCC_LENGTH (4 + IS_ENABLED(CONFIG_SLS)) -#define SETCC_ALIGN (4 << IS_ENABLED(CONFIG_SLS)) +#define RET_LENGTH (1 + (4 * IS_ENABLED(CONFIG_RETPOLINE)) + \ + IS_ENABLED(CONFIG_SLS)) +#define SETCC_LENGTH (3 + RET_LENGTH) +#define SETCC_ALIGN (4 << ((SETCC_LENGTH > 4) & 1) << ((SETCC_LENGTH > 8) & 1)) static_assert(SETCC_LENGTH <= SETCC_ALIGN); #define FOP_SETCC(op) \ @@ -447,13 +448,14 @@ static_assert(SETCC_LENGTH <= SETCC_ALIGN); ".type " #op ", @function \n\t" \ #op ": \n\t" \ #op " %al \n\t" \ - __FOP_RET(#op) + __FOP_RET(#op) \ + ".skip " __stringify(SETCC_ALIGN) " - (.-" #op "), 0xcc \n\t" asm(".pushsection .fixup, \"ax\"\n" "kvm_fastop_exception: xor %esi, %esi; " ASM_RET ".popsection"); -FOP_START(setcc) +__FOP_START(setcc, SETCC_ALIGN) FOP_SETCC(seto) FOP_SETCC(setno) FOP_SETCC(setc) From f3d9f990586c8f85ed2ced627f15801d39a7ed87 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 14 Jun 2022 23:15:43 +0200 Subject: [PATCH 0698/1056] x86/vsyscall_emu/64: Don't use RET in vsyscall emulation commit 15583e514eb16744b80be85dea0774ece153177d upstream. This is userspace code and doesn't play by the normal kernel rules. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov Reviewed-by: Josh Poimboeuf Signed-off-by: Borislav Petkov Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Greg Kroah-Hartman --- arch/x86/entry/vsyscall/vsyscall_emu_64.S | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/arch/x86/entry/vsyscall/vsyscall_emu_64.S b/arch/x86/entry/vsyscall/vsyscall_emu_64.S index 15e35159ebb68d..ef2dd182724314 100644 --- a/arch/x86/entry/vsyscall/vsyscall_emu_64.S +++ b/arch/x86/entry/vsyscall/vsyscall_emu_64.S @@ -19,17 +19,20 @@ __vsyscall_page: mov $__NR_gettimeofday, %rax syscall - RET + ret + int3 .balign 1024, 0xcc mov $__NR_time, %rax syscall - RET + ret + int3 .balign 1024, 0xcc mov $__NR_getcpu, %rax syscall - RET + ret + int3 .balign 4096, 0xcc From 6c45176bc2ae4c3c2417202903f19cd86b9d7651 Mon Sep 17 00:00:00 2001 From: Kim Phillips Date: Tue, 14 Jun 2022 23:15:44 +0200 Subject: [PATCH 0699/1056] x86/sev: Avoid using __x86_return_thunk commit 0ee9073000e8791f8b134a8ded31bcc767f7f232 upstream. Specifically, it's because __enc_copy() encrypts the kernel after being relocated outside the kernel in sme_encrypt_execute(), and the RET macro's jmp offset isn't amended prior to execution. Signed-off-by: Kim Phillips Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov Reviewed-by: Josh Poimboeuf Signed-off-by: Borislav Petkov Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Greg Kroah-Hartman --- arch/x86/mm/mem_encrypt_boot.S | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/x86/mm/mem_encrypt_boot.S b/arch/x86/mm/mem_encrypt_boot.S index 3d1dba05fce4ae..d94dea450fa60b 100644 --- a/arch/x86/mm/mem_encrypt_boot.S +++ b/arch/x86/mm/mem_encrypt_boot.S @@ -65,7 +65,9 @@ SYM_FUNC_START(sme_encrypt_execute) movq %rbp, %rsp /* Restore original stack pointer */ pop %rbp - RET + /* Offset to __x86_return_thunk would be wrong here */ + ret + int3 SYM_FUNC_END(sme_encrypt_execute) SYM_FUNC_START(__enc_copy) @@ -151,6 +153,8 @@ SYM_FUNC_START(__enc_copy) pop %r12 pop %r15 - RET + /* Offset to __x86_return_thunk would be wrong here */ + ret + int3 .L__enc_copy_end: SYM_FUNC_END(__enc_copy) From 1d61a2988612ac0632134454d5407c63ae0b9d42 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 14 Jun 2022 23:15:45 +0200 Subject: [PATCH 0700/1056] x86: Use return-thunk in asm code commit aa3d480315ba6c3025a60958e1981072ea37c3df upstream. Use the return thunk in asm code. If the thunk isn't needed, it will get patched into a RET instruction during boot by apply_returns(). Since alternatives can't handle relocations outside of the first instruction, putting a 'jmp __x86_return_thunk' in one is not valid, therefore carve out the memmove ERMS path into a separate label and jump to it. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov Reviewed-by: Josh Poimboeuf Signed-off-by: Borislav Petkov [cascardo: no RANDSTRUCT_CFLAGS] Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Greg Kroah-Hartman --- arch/x86/entry/vdso/Makefile | 1 + arch/x86/include/asm/linkage.h | 8 ++++++++ arch/x86/lib/memmove_64.S | 7 ++++++- 3 files changed, 15 insertions(+), 1 deletion(-) diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile index a2dddcc189f692..c8891d3b38d303 100644 --- a/arch/x86/entry/vdso/Makefile +++ b/arch/x86/entry/vdso/Makefile @@ -92,6 +92,7 @@ endif endif $(vobjs): KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_LTO) $(GCC_PLUGINS_CFLAGS) $(RETPOLINE_CFLAGS),$(KBUILD_CFLAGS)) $(CFL) +$(vobjs): KBUILD_AFLAGS += -DBUILD_VDSO # # vDSO code runs in userspace and -pg doesn't help with profiling anyway. diff --git a/arch/x86/include/asm/linkage.h b/arch/x86/include/asm/linkage.h index 030907922bd07e..d04e61c2f863c5 100644 --- a/arch/x86/include/asm/linkage.h +++ b/arch/x86/include/asm/linkage.h @@ -18,19 +18,27 @@ #define __ALIGN_STR __stringify(__ALIGN) #endif +#if defined(CONFIG_RETPOLINE) && !defined(__DISABLE_EXPORTS) && !defined(BUILD_VDSO) +#define RET jmp __x86_return_thunk +#else /* CONFIG_RETPOLINE */ #ifdef CONFIG_SLS #define RET ret; int3 #else #define RET ret #endif +#endif /* CONFIG_RETPOLINE */ #else /* __ASSEMBLY__ */ +#if defined(CONFIG_RETPOLINE) && !defined(__DISABLE_EXPORTS) && !defined(BUILD_VDSO) +#define ASM_RET "jmp __x86_return_thunk\n\t" +#else /* CONFIG_RETPOLINE */ #ifdef CONFIG_SLS #define ASM_RET "ret; int3\n\t" #else #define ASM_RET "ret\n\t" #endif +#endif /* CONFIG_RETPOLINE */ #endif /* __ASSEMBLY__ */ diff --git a/arch/x86/lib/memmove_64.S b/arch/x86/lib/memmove_64.S index 50ea390df7128d..4b8ee3a2fcc372 100644 --- a/arch/x86/lib/memmove_64.S +++ b/arch/x86/lib/memmove_64.S @@ -40,7 +40,7 @@ SYM_FUNC_START(__memmove) /* FSRM implies ERMS => no length checks, do the copy directly */ .Lmemmove_begin_forward: ALTERNATIVE "cmp $0x20, %rdx; jb 1f", "", X86_FEATURE_FSRM - ALTERNATIVE "", __stringify(movq %rdx, %rcx; rep movsb; RET), X86_FEATURE_ERMS + ALTERNATIVE "", "jmp .Lmemmove_erms", X86_FEATURE_ERMS /* * movsq instruction have many startup latency @@ -206,6 +206,11 @@ SYM_FUNC_START(__memmove) movb %r11b, (%rdi) 13: RET + +.Lmemmove_erms: + movq %rdx, %rcx + rep movsb + RET SYM_FUNC_END(__memmove) SYM_FUNC_END_ALIAS(memmove) EXPORT_SYMBOL(__memmove) From 1f068f9da7436b11276f23ebf65dd641c8ae0b62 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 14 Jun 2022 23:15:46 +0200 Subject: [PATCH 0701/1056] x86/entry: Avoid very early RET commit 7c81c0c9210c9bfab2bae76aab2999de5bad27db upstream. Commit ee774dac0da1 ("x86/entry: Move PUSH_AND_CLEAR_REGS out of error_entry()") manages to introduce a CALL/RET pair that is before SWITCH_TO_KERNEL_CR3, which means it is before RETBleed can be mitigated. Revert to an earlier version of the commit in Fixes. Down side is that this will bloat .text size somewhat. The alternative is fully reverting it. The purpose of this patch was to allow migrating error_entry() to C, including the whole of kPTI. Much care needs to be taken moving that forward to not re-introduce this problem of early RETs. Fixes: ee774dac0da1 ("x86/entry: Move PUSH_AND_CLEAR_REGS out of error_entry()") Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov Reviewed-by: Josh Poimboeuf Signed-off-by: Borislav Petkov Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Greg Kroah-Hartman --- arch/x86/entry/entry_64.S | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 18c57d829b598d..a63d097ca0d8f8 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -314,14 +314,6 @@ SYM_CODE_END(ret_from_fork) #endif .endm -/* Save all registers in pt_regs */ -SYM_CODE_START_LOCAL(push_and_clear_regs) - UNWIND_HINT_FUNC - PUSH_AND_CLEAR_REGS save_ret=1 - ENCODE_FRAME_POINTER 8 - RET -SYM_CODE_END(push_and_clear_regs) - /** * idtentry_body - Macro to emit code calling the C function * @cfunc: C function to be called @@ -329,8 +321,8 @@ SYM_CODE_END(push_and_clear_regs) */ .macro idtentry_body cfunc has_error_code:req - call push_and_clear_regs - UNWIND_HINT_REGS + PUSH_AND_CLEAR_REGS + ENCODE_FRAME_POINTER /* * Call error_entry() and switch to the task stack if from userspace. From 07f5c5e36236368ca761868ed82c8b654f58968a Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 14 Jun 2022 23:15:47 +0200 Subject: [PATCH 0702/1056] objtool: Treat .text.__x86.* as noinstr commit 951ddecf435659553ed15a9214e153a3af43a9a1 upstream. Needed because zen_untrain_ret() will be called from noinstr code. Also makes sense since the thunks MUST NOT contain instrumentation nor be poked with dynamic instrumentation. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov Reviewed-by: Josh Poimboeuf Signed-off-by: Borislav Petkov Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Greg Kroah-Hartman --- tools/objtool/check.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/objtool/check.c b/tools/objtool/check.c index d80c54809a91de..a955cf4cf9328f 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -367,7 +367,8 @@ static int decode_instructions(struct objtool_file *file) sec->text = true; if (!strcmp(sec->name, ".noinstr.text") || - !strcmp(sec->name, ".entry.text")) + !strcmp(sec->name, ".entry.text") || + !strncmp(sec->name, ".text.__x86.", 12)) sec->noinstr = true; for (offset = 0; offset < sec->sh.sh_size; offset += insn->len) { From a9c0926fc75468ccea0cf1f3b48b0d5a41291975 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 14 Jun 2022 23:15:48 +0200 Subject: [PATCH 0703/1056] x86: Add magic AMD return-thunk commit a149180fbcf336e97ce4eb2cdc13672727feb94d upstream. Note: needs to be in a section distinct from Retpolines such that the Retpoline RET substitution cannot possibly use immediate jumps. ORC unwinding for zen_untrain_ret() and __x86_return_thunk() is a little tricky but works due to the fact that zen_untrain_ret() doesn't have any stack ops and as such will emit a single ORC entry at the start (+0x3f). Meanwhile, unwinding an IP, including the __x86_return_thunk() one (+0x40) will search for the largest ORC entry smaller or equal to the IP, these will find the one ORC entry (+0x3f) and all works. [ Alexandre: SVM part. ] [ bp: Build fix, massages. ] Suggested-by: Andrew Cooper Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov Reviewed-by: Josh Poimboeuf Signed-off-by: Borislav Petkov [cascardo: conflicts at arch/x86/entry/entry_64_compat.S] [cascardo: there is no ANNOTATE_NOENDBR] [cascardo: objtool commit 34c861e806478ac2ea4032721defbf1d6967df08 missing] [cascardo: conflict fixup] Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Greg Kroah-Hartman --- arch/x86/entry/entry_64.S | 6 +++ arch/x86/entry/entry_64_compat.S | 4 ++ arch/x86/include/asm/cpufeatures.h | 1 + arch/x86/include/asm/disabled-features.h | 3 +- arch/x86/include/asm/nospec-branch.h | 17 +++++++ arch/x86/kernel/vmlinux.lds.S | 2 +- arch/x86/kvm/svm/vmenter.S | 18 +++++++ arch/x86/lib/retpoline.S | 63 ++++++++++++++++++++++-- tools/objtool/check.c | 20 ++++++-- 9 files changed, 126 insertions(+), 8 deletions(-) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index a63d097ca0d8f8..cf08173d209ab4 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -94,6 +94,7 @@ SYM_CODE_START(entry_SYSCALL_64) movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp SYM_INNER_LABEL(entry_SYSCALL_64_safe_stack, SYM_L_GLOBAL) + UNTRAIN_RET /* Construct struct pt_regs on stack */ pushq $__USER_DS /* pt_regs->ss */ @@ -688,6 +689,7 @@ native_irq_return_ldt: pushq %rdi /* Stash user RDI */ swapgs /* to kernel GS */ SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi /* to kernel CR3 */ + UNTRAIN_RET movq PER_CPU_VAR(espfix_waddr), %rdi movq %rax, (0*8)(%rdi) /* user RAX */ @@ -882,6 +884,7 @@ SYM_CODE_START_LOCAL(paranoid_entry) * be retrieved from a kernel internal table. */ SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg=%rax save_reg=%r14 + UNTRAIN_RET /* * Handling GSBASE depends on the availability of FSGSBASE. @@ -992,6 +995,7 @@ SYM_CODE_START_LOCAL(error_entry) FENCE_SWAPGS_USER_ENTRY /* We have user CR3. Change to kernel CR3. */ SWITCH_TO_KERNEL_CR3 scratch_reg=%rax + UNTRAIN_RET leaq 8(%rsp), %rdi /* arg0 = pt_regs pointer */ .Lerror_entry_from_usermode_after_swapgs: @@ -1044,6 +1048,7 @@ SYM_CODE_START_LOCAL(error_entry) SWAPGS FENCE_SWAPGS_USER_ENTRY SWITCH_TO_KERNEL_CR3 scratch_reg=%rax + UNTRAIN_RET /* * Pretend that the exception came from user mode: set up pt_regs @@ -1138,6 +1143,7 @@ SYM_CODE_START(asm_exc_nmi) movq %rsp, %rdx movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp UNWIND_HINT_IRET_REGS base=%rdx offset=8 + UNTRAIN_RET pushq 5*8(%rdx) /* pt_regs->ss */ pushq 4*8(%rdx) /* pt_regs->rsp */ pushq 3*8(%rdx) /* pt_regs->flags */ diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S index 0051cf5c792d1a..007f3a1fbf1a2c 100644 --- a/arch/x86/entry/entry_64_compat.S +++ b/arch/x86/entry/entry_64_compat.S @@ -14,6 +14,7 @@ #include #include #include +#include #include #include @@ -71,6 +72,7 @@ SYM_CODE_START(entry_SYSENTER_compat) pushq $__USER32_CS /* pt_regs->cs */ pushq $0 /* pt_regs->ip = 0 (placeholder) */ SYM_INNER_LABEL(entry_SYSENTER_compat_after_hwframe, SYM_L_GLOBAL) + UNTRAIN_RET /* * User tracing code (ptrace or signal handlers) might assume that @@ -211,6 +213,7 @@ SYM_CODE_START(entry_SYSCALL_compat) movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp SYM_INNER_LABEL(entry_SYSCALL_compat_safe_stack, SYM_L_GLOBAL) + UNTRAIN_RET /* Construct struct pt_regs on stack */ pushq $__USER32_DS /* pt_regs->ss */ @@ -377,6 +380,7 @@ SYM_CODE_START(entry_INT80_compat) pushq (%rdi) /* pt_regs->di */ .Lint80_keep_stack: + UNTRAIN_RET pushq %rsi /* pt_regs->si */ xorl %esi, %esi /* nospec si */ pushq %rdx /* pt_regs->dx */ diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index b1fd846f121edf..b7218d54b53fa7 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -299,6 +299,7 @@ #define X86_FEATURE_RETPOLINE (11*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */ #define X86_FEATURE_RETPOLINE_LFENCE (11*32+13) /* "" Use LFENCE for Spectre variant 2 */ #define X86_FEATURE_RETHUNK (11*32+14) /* "" Use REturn THUNK */ +#define X86_FEATURE_UNRET (11*32+15) /* "" AMD BTB untrain return */ /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */ #define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */ diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h index d7c6e648ff3892..1905ac5cd39628 100644 --- a/arch/x86/include/asm/disabled-features.h +++ b/arch/x86/include/asm/disabled-features.h @@ -61,7 +61,8 @@ #else # define DISABLE_RETPOLINE ((1 << (X86_FEATURE_RETPOLINE & 31)) | \ (1 << (X86_FEATURE_RETPOLINE_LFENCE & 31)) | \ - (1 << (X86_FEATURE_RETHUNK & 31))) + (1 << (X86_FEATURE_RETHUNK & 31)) | \ + (1 << (X86_FEATURE_UNRET & 31))) #endif /* Force disable because it's broken beyond repair */ diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 2b0d71f00c29c7..3e61777ffe2464 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -112,6 +112,22 @@ #endif .endm +/* + * Mitigate RETBleed for AMD/Hygon Zen uarch. Requires KERNEL CR3 because the + * return thunk isn't mapped into the userspace tables (then again, AMD + * typically has NO_MELTDOWN). + * + * Doesn't clobber any registers but does require a stable stack. + * + * As such, this must be placed after every *SWITCH_TO_KERNEL_CR3 at a point + * where we have a stack but before any RET instruction. + */ +.macro UNTRAIN_RET +#ifdef CONFIG_RETPOLINE + ALTERNATIVE "", "call zen_untrain_ret", X86_FEATURE_UNRET +#endif +.endm + #else /* __ASSEMBLY__ */ #define ANNOTATE_RETPOLINE_SAFE \ @@ -121,6 +137,7 @@ ".popsection\n\t" extern void __x86_return_thunk(void); +extern void zen_untrain_ret(void); #ifdef CONFIG_RETPOLINE diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index a99d016df56f6a..c1efcd194ad7bf 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -142,7 +142,7 @@ SECTIONS #ifdef CONFIG_RETPOLINE __indirect_thunk_start = .; - *(.text.__x86.indirect_thunk) + *(.text.__x86.*) __indirect_thunk_end = .; #endif } :text =0xcccc diff --git a/arch/x86/kvm/svm/vmenter.S b/arch/x86/kvm/svm/vmenter.S index dfaeb47fcf2a75..723f8534986c31 100644 --- a/arch/x86/kvm/svm/vmenter.S +++ b/arch/x86/kvm/svm/vmenter.S @@ -110,6 +110,15 @@ SYM_FUNC_START(__svm_vcpu_run) mov %r15, VCPU_R15(%_ASM_AX) #endif + /* + * Mitigate RETBleed for AMD/Hygon Zen uarch. RET should be + * untrained as soon as we exit the VM and are back to the + * kernel. This should be done before re-enabling interrupts + * because interrupt handlers won't sanitize 'ret' if the return is + * from the kernel. + */ + UNTRAIN_RET + /* * Clear all general purpose registers except RSP and RAX to prevent * speculative use of the guest's values, even those that are reloaded @@ -190,6 +199,15 @@ SYM_FUNC_START(__svm_sev_es_vcpu_run) FILL_RETURN_BUFFER %_ASM_AX, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE #endif + /* + * Mitigate RETBleed for AMD/Hygon Zen uarch. RET should be + * untrained as soon as we exit the VM and are back to the + * kernel. This should be done before re-enabling interrupts + * because interrupt handlers won't sanitize RET if the return is + * from the kernel. + */ + UNTRAIN_RET + pop %_ASM_BX #ifdef CONFIG_X86_64 diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S index 01667ea9da0204..807f674fd59e5c 100644 --- a/arch/x86/lib/retpoline.S +++ b/arch/x86/lib/retpoline.S @@ -71,10 +71,67 @@ SYM_CODE_END(__x86_indirect_thunk_array) * This function name is magical and is used by -mfunction-return=thunk-extern * for the compiler to generate JMPs to it. */ -SYM_CODE_START(__x86_return_thunk) - UNWIND_HINT_EMPTY + .section .text.__x86.return_thunk + +/* + * Safety details here pertain to the AMD Zen{1,2} microarchitecture: + * 1) The RET at __x86_return_thunk must be on a 64 byte boundary, for + * alignment within the BTB. + * 2) The instruction at zen_untrain_ret must contain, and not + * end with, the 0xc3 byte of the RET. + * 3) STIBP must be enabled, or SMT disabled, to prevent the sibling thread + * from re-poisioning the BTB prediction. + */ + .align 64 + .skip 63, 0xcc +SYM_FUNC_START_NOALIGN(zen_untrain_ret); + + /* + * As executed from zen_untrain_ret, this is: + * + * TEST $0xcc, %bl + * LFENCE + * JMP __x86_return_thunk + * + * Executing the TEST instruction has a side effect of evicting any BTB + * prediction (potentially attacker controlled) attached to the RET, as + * __x86_return_thunk + 1 isn't an instruction boundary at the moment. + */ + .byte 0xf6 + + /* + * As executed from __x86_return_thunk, this is a plain RET. + * + * As part of the TEST above, RET is the ModRM byte, and INT3 the imm8. + * + * We subsequently jump backwards and architecturally execute the RET. + * This creates a correct BTB prediction (type=ret), but in the + * meantime we suffer Straight Line Speculation (because the type was + * no branch) which is halted by the INT3. + * + * With SMT enabled and STIBP active, a sibling thread cannot poison + * RET's prediction to a type of its choice, but can evict the + * prediction due to competitive sharing. If the prediction is + * evicted, __x86_return_thunk will suffer Straight Line Speculation + * which will be contained safely by the INT3. + */ +SYM_INNER_LABEL(__x86_return_thunk, SYM_L_GLOBAL) ret int3 SYM_CODE_END(__x86_return_thunk) -__EXPORT_THUNK(__x86_return_thunk) + /* + * Ensure the TEST decoding / BTB invalidation is complete. + */ + lfence + + /* + * Jump back and execute the RET in the middle of the TEST instruction. + * INT3 is for SLS protection. + */ + jmp __x86_return_thunk + int3 +SYM_FUNC_END(zen_untrain_ret) +__EXPORT_THUNK(zen_untrain_ret) + +EXPORT_SYMBOL(__x86_return_thunk) diff --git a/tools/objtool/check.c b/tools/objtool/check.c index a955cf4cf9328f..a90f0d65771600 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -1145,7 +1145,7 @@ static void add_retpoline_call(struct objtool_file *file, struct instruction *in annotate_call_site(file, insn, false); } -static void add_return_call(struct objtool_file *file, struct instruction *insn) +static void add_return_call(struct objtool_file *file, struct instruction *insn, bool add) { /* * Return thunk tail calls are really just returns in disguise, @@ -1155,7 +1155,7 @@ static void add_return_call(struct objtool_file *file, struct instruction *insn) insn->retpoline_safe = true; /* Skip the non-text sections, specially .discard ones */ - if (insn->sec->text) + if (add && insn->sec->text) list_add_tail(&insn->call_node, &file->return_thunk_list); } @@ -1184,7 +1184,7 @@ static int add_jump_destinations(struct objtool_file *file) add_retpoline_call(file, insn); continue; } else if (reloc->sym->return_thunk) { - add_return_call(file, insn); + add_return_call(file, insn, true); continue; } else if (insn->func) { /* internal or external sibling call (with reloc) */ @@ -1201,6 +1201,7 @@ static int add_jump_destinations(struct objtool_file *file) insn->jump_dest = find_insn(file, dest_sec, dest_off); if (!insn->jump_dest) { + struct symbol *sym = find_symbol_by_offset(dest_sec, dest_off); /* * This is a special case where an alt instruction @@ -1210,6 +1211,19 @@ static int add_jump_destinations(struct objtool_file *file) if (!strcmp(insn->sec->name, ".altinstr_replacement")) continue; + /* + * This is a special case for zen_untrain_ret(). + * It jumps to __x86_return_thunk(), but objtool + * can't find the thunk's starting RET + * instruction, because the RET is also in the + * middle of another instruction. Objtool only + * knows about the outer instruction. + */ + if (sym && sym->return_thunk) { + add_return_call(file, insn, false); + continue; + } + WARN_FUNC("can't find jump dest instruction at %s+0x%lx", insn->sec, insn->offset, dest_sec->name, dest_off); From 82e92fe936dbefda25ed030abe91ae76576cd6da Mon Sep 17 00:00:00 2001 From: Alexandre Chartre Date: Tue, 14 Jun 2022 23:15:49 +0200 Subject: [PATCH 0704/1056] x86/bugs: Report AMD retbleed vulnerability commit 6b80b59b3555706508008f1f127b5412c89c7fd8 upstream. Report that AMD x86 CPUs are vulnerable to the RETBleed (Arbitrary Speculative Code Execution with Return Instructions) attack. [peterz: add hygon] [kim: invert parity; fam15h] Co-developed-by: Kim Phillips Signed-off-by: Kim Phillips Signed-off-by: Alexandre Chartre Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov Reviewed-by: Josh Poimboeuf Signed-off-by: Borislav Petkov Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/cpufeatures.h | 1 + arch/x86/kernel/cpu/bugs.c | 13 +++++++++++++ arch/x86/kernel/cpu/common.c | 19 +++++++++++++++++++ drivers/base/cpu.c | 8 ++++++++ include/linux/cpu.h | 2 ++ 5 files changed, 43 insertions(+) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index b7218d54b53fa7..7f1fb9ad66b7a1 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -443,5 +443,6 @@ #define X86_BUG_ITLB_MULTIHIT X86_BUG(23) /* CPU may incur MCE during certain page attribute changes */ #define X86_BUG_SRBDS X86_BUG(24) /* CPU may leak RNG bits if not mitigated */ #define X86_BUG_MMIO_STALE_DATA X86_BUG(25) /* CPU is affected by Processor MMIO Stale Data vulnerabilities */ +#define X86_BUG_RETBLEED X86_BUG(26) /* CPU is affected by RETBleed */ #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 2c512ff2ee102c..c62ea37ca38ab3 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -1987,6 +1987,11 @@ static ssize_t srbds_show_state(char *buf) return sprintf(buf, "%s\n", srbds_strings[srbds_mitigation]); } +static ssize_t retbleed_show_state(char *buf) +{ + return sprintf(buf, "Vulnerable\n"); +} + static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr, char *buf, unsigned int bug) { @@ -2032,6 +2037,9 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr case X86_BUG_MMIO_STALE_DATA: return mmio_stale_data_show_state(buf); + case X86_BUG_RETBLEED: + return retbleed_show_state(buf); + default: break; } @@ -2088,4 +2096,9 @@ ssize_t cpu_show_mmio_stale_data(struct device *dev, struct device_attribute *at { return cpu_show_common(dev, attr, buf, X86_BUG_MMIO_STALE_DATA); } + +ssize_t cpu_show_retbleed(struct device *dev, struct device_attribute *attr, char *buf) +{ + return cpu_show_common(dev, attr, buf, X86_BUG_RETBLEED); +} #endif diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index bd390c56e55132..d5488cff3e011f 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1095,16 +1095,27 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = { {} }; +#define VULNBL(vendor, family, model, blacklist) \ + X86_MATCH_VENDOR_FAM_MODEL(vendor, family, model, blacklist) + #define VULNBL_INTEL_STEPPINGS(model, steppings, issues) \ X86_MATCH_VENDOR_FAM_MODEL_STEPPINGS_FEATURE(INTEL, 6, \ INTEL_FAM6_##model, steppings, \ X86_FEATURE_ANY, issues) +#define VULNBL_AMD(family, blacklist) \ + VULNBL(AMD, family, X86_MODEL_ANY, blacklist) + +#define VULNBL_HYGON(family, blacklist) \ + VULNBL(HYGON, family, X86_MODEL_ANY, blacklist) + #define SRBDS BIT(0) /* CPU is affected by X86_BUG_MMIO_STALE_DATA */ #define MMIO BIT(1) /* CPU is affected by Shared Buffers Data Sampling (SBDS), a variant of X86_BUG_MMIO_STALE_DATA */ #define MMIO_SBDS BIT(2) +/* CPU is affected by RETbleed, speculating where you would not expect it */ +#define RETBLEED BIT(3) static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = { VULNBL_INTEL_STEPPINGS(IVYBRIDGE, X86_STEPPING_ANY, SRBDS), @@ -1137,6 +1148,11 @@ static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = { VULNBL_INTEL_STEPPINGS(ATOM_TREMONT, X86_STEPPINGS(0x1, 0x1), MMIO | MMIO_SBDS), VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_D, X86_STEPPING_ANY, MMIO), VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_L, X86_STEPPINGS(0x0, 0x0), MMIO | MMIO_SBDS), + + VULNBL_AMD(0x15, RETBLEED), + VULNBL_AMD(0x16, RETBLEED), + VULNBL_AMD(0x17, RETBLEED), + VULNBL_HYGON(0x18, RETBLEED), {} }; @@ -1238,6 +1254,9 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) !arch_cap_mmio_immune(ia32_cap)) setup_force_cpu_bug(X86_BUG_MMIO_STALE_DATA); + if (cpu_matches(cpu_vuln_blacklist, RETBLEED)) + setup_force_cpu_bug(X86_BUG_RETBLEED); + if (cpu_matches(cpu_vuln_whitelist, NO_MELTDOWN)) return; diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c index fddd08e482fcf8..55405ebf23abfd 100644 --- a/drivers/base/cpu.c +++ b/drivers/base/cpu.c @@ -570,6 +570,12 @@ ssize_t __weak cpu_show_mmio_stale_data(struct device *dev, return sysfs_emit(buf, "Not affected\n"); } +ssize_t __weak cpu_show_retbleed(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sysfs_emit(buf, "Not affected\n"); +} + static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL); static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL); static DEVICE_ATTR(spectre_v2, 0444, cpu_show_spectre_v2, NULL); @@ -580,6 +586,7 @@ static DEVICE_ATTR(tsx_async_abort, 0444, cpu_show_tsx_async_abort, NULL); static DEVICE_ATTR(itlb_multihit, 0444, cpu_show_itlb_multihit, NULL); static DEVICE_ATTR(srbds, 0444, cpu_show_srbds, NULL); static DEVICE_ATTR(mmio_stale_data, 0444, cpu_show_mmio_stale_data, NULL); +static DEVICE_ATTR(retbleed, 0444, cpu_show_retbleed, NULL); static struct attribute *cpu_root_vulnerabilities_attrs[] = { &dev_attr_meltdown.attr, @@ -592,6 +599,7 @@ static struct attribute *cpu_root_vulnerabilities_attrs[] = { &dev_attr_itlb_multihit.attr, &dev_attr_srbds.attr, &dev_attr_mmio_stale_data.attr, + &dev_attr_retbleed.attr, NULL }; diff --git a/include/linux/cpu.h b/include/linux/cpu.h index eb3394f5b1092b..6102a21a01d9a4 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -68,6 +68,8 @@ extern ssize_t cpu_show_srbds(struct device *dev, struct device_attribute *attr, extern ssize_t cpu_show_mmio_stale_data(struct device *dev, struct device_attribute *attr, char *buf); +extern ssize_t cpu_show_retbleed(struct device *dev, + struct device_attribute *attr, char *buf); extern __printf(4, 5) struct device *cpu_device_create(struct device *parent, void *drvdata, From 89eba42632fcfff30e84f66a5a7ca63882ac64ea Mon Sep 17 00:00:00 2001 From: Alexandre Chartre Date: Tue, 14 Jun 2022 23:15:50 +0200 Subject: [PATCH 0705/1056] x86/bugs: Add AMD retbleed= boot parameter commit 7fbf47c7ce50b38a64576b150e7011ae73d54669 upstream. Add the "retbleed=" boot parameter to select a mitigation for RETBleed. Possible values are "off", "auto" and "unret" (JMP2RET mitigation). The default value is "auto". Currently, "retbleed=auto" will select the unret mitigation on AMD and Hygon and no mitigation on Intel (JMP2RET is not effective on Intel). [peterz: rebase; add hygon] [jpoimboe: cleanups] Signed-off-by: Alexandre Chartre Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov Reviewed-by: Josh Poimboeuf Signed-off-by: Borislav Petkov Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Greg Kroah-Hartman --- .../admin-guide/kernel-parameters.txt | 15 +++ arch/x86/Kconfig | 3 + arch/x86/kernel/cpu/bugs.c | 108 +++++++++++++++++- 3 files changed, 125 insertions(+), 1 deletion(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 9f4f3e2ceea60a..961843aa840324 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -4968,6 +4968,21 @@ retain_initrd [RAM] Keep initrd memory after extraction + retbleed= [X86] Control mitigation of RETBleed (Arbitrary + Speculative Code Execution with Return Instructions) + vulnerability. + + off - unconditionally disable + auto - automatically select a migitation + unret - force enable untrained return thunks, + only effective on AMD Zen {1,2} + based systems. + + Selecting 'auto' will choose a mitigation method at run + time according to the CPU. + + Not specifying this option is equivalent to retbleed=auto. + rfkill.default_state= 0 "airplane mode". All wifi, bluetooth, wimax, gps, fm, etc. communication is blocked by default. diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 1d0f16b5339375..babe46b4b85f6f 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -471,6 +471,9 @@ config RETPOLINE config CC_HAS_SLS def_bool $(cc-option,-mharden-sls=all) +config CC_HAS_RETURN_THUNK + def_bool $(cc-option,-mfunction-return=thunk-extern) + config SLS bool "Mitigate Straight-Line-Speculation" depends on CC_HAS_SLS && X86_64 diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index c62ea37ca38ab3..3e01a88f7d5dad 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -37,6 +37,7 @@ #include "cpu.h" static void __init spectre_v1_select_mitigation(void); +static void __init retbleed_select_mitigation(void); static void __init spectre_v2_select_mitigation(void); static void __init ssb_select_mitigation(void); static void __init l1tf_select_mitigation(void); @@ -120,6 +121,12 @@ void __init check_bugs(void) /* Select the proper CPU mitigations before patching alternatives: */ spectre_v1_select_mitigation(); + retbleed_select_mitigation(); + /* + * spectre_v2_select_mitigation() relies on the state set by + * retbleed_select_mitigation(); specifically the STIBP selection is + * forced for UNRET. + */ spectre_v2_select_mitigation(); ssb_select_mitigation(); l1tf_select_mitigation(); @@ -745,6 +752,100 @@ static int __init nospectre_v1_cmdline(char *str) } early_param("nospectre_v1", nospectre_v1_cmdline); +#undef pr_fmt +#define pr_fmt(fmt) "RETBleed: " fmt + +enum retbleed_mitigation { + RETBLEED_MITIGATION_NONE, + RETBLEED_MITIGATION_UNRET, +}; + +enum retbleed_mitigation_cmd { + RETBLEED_CMD_OFF, + RETBLEED_CMD_AUTO, + RETBLEED_CMD_UNRET, +}; + +const char * const retbleed_strings[] = { + [RETBLEED_MITIGATION_NONE] = "Vulnerable", + [RETBLEED_MITIGATION_UNRET] = "Mitigation: untrained return thunk", +}; + +static enum retbleed_mitigation retbleed_mitigation __ro_after_init = + RETBLEED_MITIGATION_NONE; +static enum retbleed_mitigation_cmd retbleed_cmd __ro_after_init = + RETBLEED_CMD_AUTO; + +static int __init retbleed_parse_cmdline(char *str) +{ + if (!str) + return -EINVAL; + + if (!strcmp(str, "off")) + retbleed_cmd = RETBLEED_CMD_OFF; + else if (!strcmp(str, "auto")) + retbleed_cmd = RETBLEED_CMD_AUTO; + else if (!strcmp(str, "unret")) + retbleed_cmd = RETBLEED_CMD_UNRET; + else + pr_err("Unknown retbleed option (%s). Defaulting to 'auto'\n", str); + + return 0; +} +early_param("retbleed", retbleed_parse_cmdline); + +#define RETBLEED_UNTRAIN_MSG "WARNING: BTB untrained return thunk mitigation is only effective on AMD/Hygon!\n" +#define RETBLEED_COMPILER_MSG "WARNING: kernel not compiled with RETPOLINE or -mfunction-return capable compiler!\n" + +static void __init retbleed_select_mitigation(void) +{ + if (!boot_cpu_has_bug(X86_BUG_RETBLEED) || cpu_mitigations_off()) + return; + + switch (retbleed_cmd) { + case RETBLEED_CMD_OFF: + return; + + case RETBLEED_CMD_UNRET: + retbleed_mitigation = RETBLEED_MITIGATION_UNRET; + break; + + case RETBLEED_CMD_AUTO: + default: + if (!boot_cpu_has_bug(X86_BUG_RETBLEED)) + break; + + if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD || + boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) + retbleed_mitigation = RETBLEED_MITIGATION_UNRET; + break; + } + + switch (retbleed_mitigation) { + case RETBLEED_MITIGATION_UNRET: + + if (!IS_ENABLED(CONFIG_RETPOLINE) || + !IS_ENABLED(CONFIG_CC_HAS_RETURN_THUNK)) { + pr_err(RETBLEED_COMPILER_MSG); + retbleed_mitigation = RETBLEED_MITIGATION_NONE; + break; + } + + setup_force_cpu_cap(X86_FEATURE_RETHUNK); + setup_force_cpu_cap(X86_FEATURE_UNRET); + + if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD && + boot_cpu_data.x86_vendor != X86_VENDOR_HYGON) + pr_err(RETBLEED_UNTRAIN_MSG); + break; + + default: + break; + } + + pr_info("%s\n", retbleed_strings[retbleed_mitigation]); +} + #undef pr_fmt #define pr_fmt(fmt) "Spectre V2 : " fmt @@ -1989,7 +2090,12 @@ static ssize_t srbds_show_state(char *buf) static ssize_t retbleed_show_state(char *buf) { - return sprintf(buf, "Vulnerable\n"); + if (retbleed_mitigation == RETBLEED_MITIGATION_UNRET && + (boot_cpu_data.x86_vendor != X86_VENDOR_AMD && + boot_cpu_data.x86_vendor != X86_VENDOR_HYGON)) + return sprintf(buf, "Vulnerable: untrained return thunk on non-Zen uarch\n"); + + return sprintf(buf, "%s\n", retbleed_strings[retbleed_mitigation]); } static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr, From b4e05ea71edaf52f2ecef675407a82160176ac54 Mon Sep 17 00:00:00 2001 From: Kim Phillips Date: Tue, 14 Jun 2022 23:15:51 +0200 Subject: [PATCH 0706/1056] x86/bugs: Enable STIBP for JMP2RET commit e8ec1b6e08a2102d8755ccb06fa26d540f26a2fa upstream. For untrained return thunks to be fully effective, STIBP must be enabled or SMT disabled. Co-developed-by: Josh Poimboeuf Signed-off-by: Josh Poimboeuf Signed-off-by: Kim Phillips Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Greg Kroah-Hartman --- .../admin-guide/kernel-parameters.txt | 16 +++-- arch/x86/kernel/cpu/bugs.c | 58 +++++++++++++++---- 2 files changed, 57 insertions(+), 17 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 961843aa840324..998420529fbc01 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -4972,11 +4972,17 @@ Speculative Code Execution with Return Instructions) vulnerability. - off - unconditionally disable - auto - automatically select a migitation - unret - force enable untrained return thunks, - only effective on AMD Zen {1,2} - based systems. + off - no mitigation + auto - automatically select a migitation + auto,nosmt - automatically select a mitigation, + disabling SMT if necessary for + the full mitigation (only on Zen1 + and older without STIBP). + unret - force enable untrained return thunks, + only effective on AMD f15h-f17h + based systems. + unret,nosmt - like unret, will disable SMT when STIBP + is not available. Selecting 'auto' will choose a mitigation method at run time according to the CPU. diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 3e01a88f7d5dad..27bb40a718ae25 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -776,19 +776,34 @@ static enum retbleed_mitigation retbleed_mitigation __ro_after_init = static enum retbleed_mitigation_cmd retbleed_cmd __ro_after_init = RETBLEED_CMD_AUTO; +static int __ro_after_init retbleed_nosmt = false; + static int __init retbleed_parse_cmdline(char *str) { if (!str) return -EINVAL; - if (!strcmp(str, "off")) - retbleed_cmd = RETBLEED_CMD_OFF; - else if (!strcmp(str, "auto")) - retbleed_cmd = RETBLEED_CMD_AUTO; - else if (!strcmp(str, "unret")) - retbleed_cmd = RETBLEED_CMD_UNRET; - else - pr_err("Unknown retbleed option (%s). Defaulting to 'auto'\n", str); + while (str) { + char *next = strchr(str, ','); + if (next) { + *next = 0; + next++; + } + + if (!strcmp(str, "off")) { + retbleed_cmd = RETBLEED_CMD_OFF; + } else if (!strcmp(str, "auto")) { + retbleed_cmd = RETBLEED_CMD_AUTO; + } else if (!strcmp(str, "unret")) { + retbleed_cmd = RETBLEED_CMD_UNRET; + } else if (!strcmp(str, "nosmt")) { + retbleed_nosmt = true; + } else { + pr_err("Ignoring unknown retbleed option (%s).", str); + } + + str = next; + } return 0; } @@ -834,6 +849,10 @@ static void __init retbleed_select_mitigation(void) setup_force_cpu_cap(X86_FEATURE_RETHUNK); setup_force_cpu_cap(X86_FEATURE_UNRET); + if (!boot_cpu_has(X86_FEATURE_STIBP) && + (retbleed_nosmt || cpu_mitigations_auto_nosmt())) + cpu_smt_disable(false); + if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD && boot_cpu_data.x86_vendor != X86_VENDOR_HYGON) pr_err(RETBLEED_UNTRAIN_MSG); @@ -1080,6 +1099,13 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd) boot_cpu_has(X86_FEATURE_AMD_STIBP_ALWAYS_ON)) mode = SPECTRE_V2_USER_STRICT_PREFERRED; + if (retbleed_mitigation == RETBLEED_MITIGATION_UNRET) { + if (mode != SPECTRE_V2_USER_STRICT && + mode != SPECTRE_V2_USER_STRICT_PREFERRED) + pr_info("Selecting STIBP always-on mode to complement retbleed mitigation'\n"); + mode = SPECTRE_V2_USER_STRICT_PREFERRED; + } + spectre_v2_user_stibp = mode; set_mode: @@ -2090,10 +2116,18 @@ static ssize_t srbds_show_state(char *buf) static ssize_t retbleed_show_state(char *buf) { - if (retbleed_mitigation == RETBLEED_MITIGATION_UNRET && - (boot_cpu_data.x86_vendor != X86_VENDOR_AMD && - boot_cpu_data.x86_vendor != X86_VENDOR_HYGON)) - return sprintf(buf, "Vulnerable: untrained return thunk on non-Zen uarch\n"); + if (retbleed_mitigation == RETBLEED_MITIGATION_UNRET) { + if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD && + boot_cpu_data.x86_vendor != X86_VENDOR_HYGON) + return sprintf(buf, "Vulnerable: untrained return thunk on non-Zen uarch\n"); + + return sprintf(buf, "%s; SMT %s\n", + retbleed_strings[retbleed_mitigation], + !sched_smt_active() ? "disabled" : + spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT || + spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT_PREFERRED ? + "enabled with STIBP protection" : "vulnerable"); + } return sprintf(buf, "%s\n", retbleed_strings[retbleed_mitigation]); } From fac1b0007cec8335f47b01358f352a9bdd72f7fb Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 14 Jun 2022 23:15:52 +0200 Subject: [PATCH 0707/1056] x86/bugs: Keep a per-CPU IA32_SPEC_CTRL value commit caa0ff24d5d0e02abce5e65c3d2b7f20a6617be5 upstream. Due to TIF_SSBD and TIF_SPEC_IB the actual IA32_SPEC_CTRL value can differ from x86_spec_ctrl_base. As such, keep a per-CPU value reflecting the current task's MSR content. [jpoimboe: rename] Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov Reviewed-by: Josh Poimboeuf Signed-off-by: Borislav Petkov Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/nospec-branch.h | 1 + arch/x86/kernel/cpu/bugs.c | 28 +++++++++++++++++++++++----- arch/x86/kernel/process.c | 2 +- 3 files changed, 25 insertions(+), 6 deletions(-) diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 3e61777ffe2464..8b34e34b56236b 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -254,6 +254,7 @@ static inline void indirect_branch_prediction_barrier(void) /* The Intel SPEC CTRL MSR base value cache */ extern u64 x86_spec_ctrl_base; +extern void write_spec_ctrl_current(u64 val); /* * With retpoline, we must use IBRS to restrict branch prediction diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 27bb40a718ae25..eae8ff8067fa25 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -49,11 +49,29 @@ static void __init mmio_select_mitigation(void); static void __init srbds_select_mitigation(void); static void __init l1d_flush_select_mitigation(void); -/* The base value of the SPEC_CTRL MSR that always has to be preserved. */ +/* The base value of the SPEC_CTRL MSR without task-specific bits set */ u64 x86_spec_ctrl_base; EXPORT_SYMBOL_GPL(x86_spec_ctrl_base); + +/* The current value of the SPEC_CTRL MSR with task-specific bits set */ +DEFINE_PER_CPU(u64, x86_spec_ctrl_current); +EXPORT_SYMBOL_GPL(x86_spec_ctrl_current); + static DEFINE_MUTEX(spec_ctrl_mutex); +/* + * Keep track of the SPEC_CTRL MSR value for the current task, which may differ + * from x86_spec_ctrl_base due to STIBP/SSB in __speculation_ctrl_update(). + */ +void write_spec_ctrl_current(u64 val) +{ + if (this_cpu_read(x86_spec_ctrl_current) == val) + return; + + this_cpu_write(x86_spec_ctrl_current, val); + wrmsrl(MSR_IA32_SPEC_CTRL, val); +} + /* * The vendor and possibly platform specific bits which can be modified in * x86_spec_ctrl_base. @@ -1272,7 +1290,7 @@ static void __init spectre_v2_select_mitigation(void) if (spectre_v2_in_eibrs_mode(mode)) { /* Force it so VMEXIT will restore correctly */ x86_spec_ctrl_base |= SPEC_CTRL_IBRS; - wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); + write_spec_ctrl_current(x86_spec_ctrl_base); } switch (mode) { @@ -1327,7 +1345,7 @@ static void __init spectre_v2_select_mitigation(void) static void update_stibp_msr(void * __unused) { - wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); + write_spec_ctrl_current(x86_spec_ctrl_base); } /* Update x86_spec_ctrl_base in case SMT state changed. */ @@ -1570,7 +1588,7 @@ static enum ssb_mitigation __init __ssb_select_mitigation(void) x86_amd_ssb_disable(); } else { x86_spec_ctrl_base |= SPEC_CTRL_SSBD; - wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); + write_spec_ctrl_current(x86_spec_ctrl_base); } } @@ -1821,7 +1839,7 @@ int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which) void x86_spec_ctrl_setup_ap(void) { if (boot_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) - wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); + write_spec_ctrl_current(x86_spec_ctrl_base); if (ssb_mode == SPEC_STORE_BYPASS_DISABLE) x86_amd_ssb_disable(); diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index f2f733bcb2b959..7527762987358f 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -584,7 +584,7 @@ static __always_inline void __speculation_ctrl_update(unsigned long tifp, } if (updmsr) - wrmsrl(MSR_IA32_SPEC_CTRL, msr); + write_spec_ctrl_current(msr); } static unsigned long speculation_ctrl_update_tif(struct task_struct *tsk) From 62b4db57eefec6ab544749a4f0178c4f68bad835 Mon Sep 17 00:00:00 2001 From: Thadeu Lima de Souza Cascardo Date: Sat, 9 Jul 2022 23:42:53 -0300 Subject: [PATCH 0708/1056] x86/entry: Add kernel IBRS implementation commit 2dbb887e875b1de3ca8f40ddf26bcfe55798c609 upstream. Implement Kernel IBRS - currently the only known option to mitigate RSB underflow speculation issues on Skylake hardware. Note: since IBRS_ENTER requires fuller context established than UNTRAIN_RET, it must be placed after it. However, since UNTRAIN_RET itself implies a RET, it must come after IBRS_ENTER. This means IBRS_ENTER needs to also move UNTRAIN_RET. Note 2: KERNEL_IBRS is sub-optimal for XenPV. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov Reviewed-by: Josh Poimboeuf Signed-off-by: Borislav Petkov [cascardo: conflict at arch/x86/entry/entry_64_compat.S] [cascardo: conflict fixups, no ANNOTATE_NOENDBR] Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Greg Kroah-Hartman --- arch/x86/entry/calling.h | 58 ++++++++++++++++++++++++++++++ arch/x86/entry/entry_64.S | 44 ++++++++++++++++++++--- arch/x86/entry/entry_64_compat.S | 17 ++++++--- arch/x86/include/asm/cpufeatures.h | 2 +- 4 files changed, 111 insertions(+), 10 deletions(-) diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h index 2b12c691fbde4c..99d2410c18d1c2 100644 --- a/arch/x86/entry/calling.h +++ b/arch/x86/entry/calling.h @@ -7,6 +7,8 @@ #include #include #include +#include +#include /* @@ -281,6 +283,62 @@ For 32-bit we have the following conventions - kernel is built with #endif +/* + * IBRS kernel mitigation for Spectre_v2. + * + * Assumes full context is established (PUSH_REGS, CR3 and GS) and it clobbers + * the regs it uses (AX, CX, DX). Must be called before the first RET + * instruction (NOTE! UNTRAIN_RET includes a RET instruction) + * + * The optional argument is used to save/restore the current value, + * which is used on the paranoid paths. + * + * Assumes x86_spec_ctrl_{base,current} to have SPEC_CTRL_IBRS set. + */ +.macro IBRS_ENTER save_reg + ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_KERNEL_IBRS + movl $MSR_IA32_SPEC_CTRL, %ecx + +.ifnb \save_reg + rdmsr + shl $32, %rdx + or %rdx, %rax + mov %rax, \save_reg + test $SPEC_CTRL_IBRS, %eax + jz .Ldo_wrmsr_\@ + lfence + jmp .Lend_\@ +.Ldo_wrmsr_\@: +.endif + + movq PER_CPU_VAR(x86_spec_ctrl_current), %rdx + movl %edx, %eax + shr $32, %rdx + wrmsr +.Lend_\@: +.endm + +/* + * Similar to IBRS_ENTER, requires KERNEL GS,CR3 and clobbers (AX, CX, DX) + * regs. Must be called after the last RET. + */ +.macro IBRS_EXIT save_reg + ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_KERNEL_IBRS + movl $MSR_IA32_SPEC_CTRL, %ecx + +.ifnb \save_reg + mov \save_reg, %rdx +.else + movq PER_CPU_VAR(x86_spec_ctrl_current), %rdx + andl $(~SPEC_CTRL_IBRS), %edx +.endif + + movl %edx, %eax + shr $32, %rdx + wrmsr +.Lend_\@: +.endm + /* * Mitigate Spectre v1 for conditional swapgs code paths. * diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index cf08173d209ab4..ebeeb014320943 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -94,7 +94,6 @@ SYM_CODE_START(entry_SYSCALL_64) movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp SYM_INNER_LABEL(entry_SYSCALL_64_safe_stack, SYM_L_GLOBAL) - UNTRAIN_RET /* Construct struct pt_regs on stack */ pushq $__USER_DS /* pt_regs->ss */ @@ -111,6 +110,11 @@ SYM_INNER_LABEL(entry_SYSCALL_64_after_hwframe, SYM_L_GLOBAL) movq %rsp, %rdi /* Sign extend the lower 32bit as syscall numbers are treated as int */ movslq %eax, %rsi + + /* clobbers %rax, make sure it is after saving the syscall nr */ + IBRS_ENTER + UNTRAIN_RET + call do_syscall_64 /* returns with IRQs disabled */ /* @@ -190,6 +194,7 @@ SYM_INNER_LABEL(entry_SYSCALL_64_after_hwframe, SYM_L_GLOBAL) * perf profiles. Nothing jumps here. */ syscall_return_via_sysret: + IBRS_EXIT POP_REGS pop_rdi=0 /* @@ -582,6 +587,7 @@ __irqentry_text_end: SYM_CODE_START_LOCAL(common_interrupt_return) SYM_INNER_LABEL(swapgs_restore_regs_and_return_to_usermode, SYM_L_GLOBAL) + IBRS_EXIT #ifdef CONFIG_DEBUG_ENTRY /* Assert that pt_regs indicates user mode. */ testb $3, CS(%rsp) @@ -861,6 +867,9 @@ SYM_CODE_END(xen_failsafe_callback) * 1 -> no SWAPGS on exit * * Y GSBASE value at entry, must be restored in paranoid_exit + * + * R14 - old CR3 + * R15 - old SPEC_CTRL */ SYM_CODE_START_LOCAL(paranoid_entry) UNWIND_HINT_FUNC @@ -884,7 +893,6 @@ SYM_CODE_START_LOCAL(paranoid_entry) * be retrieved from a kernel internal table. */ SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg=%rax save_reg=%r14 - UNTRAIN_RET /* * Handling GSBASE depends on the availability of FSGSBASE. @@ -906,7 +914,7 @@ SYM_CODE_START_LOCAL(paranoid_entry) * is needed here. */ SAVE_AND_SET_GSBASE scratch_reg=%rax save_reg=%rbx - RET + jmp .Lparanoid_gsbase_done .Lparanoid_entry_checkgs: /* EBX = 1 -> kernel GSBASE active, no restore required */ @@ -925,8 +933,16 @@ SYM_CODE_START_LOCAL(paranoid_entry) xorl %ebx, %ebx swapgs .Lparanoid_kernel_gsbase: - FENCE_SWAPGS_KERNEL_ENTRY +.Lparanoid_gsbase_done: + + /* + * Once we have CR3 and %GS setup save and set SPEC_CTRL. Just like + * CR3 above, keep the old value in a callee saved register. + */ + IBRS_ENTER save_reg=%r15 + UNTRAIN_RET + RET SYM_CODE_END(paranoid_entry) @@ -948,9 +964,19 @@ SYM_CODE_END(paranoid_entry) * 1 -> no SWAPGS on exit * * Y User space GSBASE, must be restored unconditionally + * + * R14 - old CR3 + * R15 - old SPEC_CTRL */ SYM_CODE_START_LOCAL(paranoid_exit) UNWIND_HINT_REGS + + /* + * Must restore IBRS state before both CR3 and %GS since we need access + * to the per-CPU x86_spec_ctrl_shadow variable. + */ + IBRS_EXIT save_reg=%r15 + /* * The order of operations is important. RESTORE_CR3 requires * kernel GSBASE. @@ -995,10 +1021,12 @@ SYM_CODE_START_LOCAL(error_entry) FENCE_SWAPGS_USER_ENTRY /* We have user CR3. Change to kernel CR3. */ SWITCH_TO_KERNEL_CR3 scratch_reg=%rax + IBRS_ENTER UNTRAIN_RET leaq 8(%rsp), %rdi /* arg0 = pt_regs pointer */ .Lerror_entry_from_usermode_after_swapgs: + /* Put us onto the real thread stack. */ call sync_regs RET @@ -1048,6 +1076,7 @@ SYM_CODE_START_LOCAL(error_entry) SWAPGS FENCE_SWAPGS_USER_ENTRY SWITCH_TO_KERNEL_CR3 scratch_reg=%rax + IBRS_ENTER UNTRAIN_RET /* @@ -1143,7 +1172,6 @@ SYM_CODE_START(asm_exc_nmi) movq %rsp, %rdx movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp UNWIND_HINT_IRET_REGS base=%rdx offset=8 - UNTRAIN_RET pushq 5*8(%rdx) /* pt_regs->ss */ pushq 4*8(%rdx) /* pt_regs->rsp */ pushq 3*8(%rdx) /* pt_regs->flags */ @@ -1154,6 +1182,9 @@ SYM_CODE_START(asm_exc_nmi) PUSH_AND_CLEAR_REGS rdx=(%rdx) ENCODE_FRAME_POINTER + IBRS_ENTER + UNTRAIN_RET + /* * At this point we no longer need to worry about stack damage * due to nesting -- we're on the normal thread stack and we're @@ -1376,6 +1407,9 @@ end_repeat_nmi: movq $-1, %rsi call exc_nmi + /* Always restore stashed SPEC_CTRL value (see paranoid_entry) */ + IBRS_EXIT save_reg=%r15 + /* Always restore stashed CR3 value (see paranoid_entry) */ RESTORE_CR3 scratch_reg=%r15 save_reg=%r14 diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S index 007f3a1fbf1a2c..e2f25403ec3072 100644 --- a/arch/x86/entry/entry_64_compat.S +++ b/arch/x86/entry/entry_64_compat.S @@ -4,7 +4,6 @@ * * Copyright 2000-2002 Andi Kleen, SuSE Labs. */ -#include "calling.h" #include #include #include @@ -18,6 +17,8 @@ #include #include +#include "calling.h" + .section .entry.text, "ax" /* @@ -72,7 +73,6 @@ SYM_CODE_START(entry_SYSENTER_compat) pushq $__USER32_CS /* pt_regs->cs */ pushq $0 /* pt_regs->ip = 0 (placeholder) */ SYM_INNER_LABEL(entry_SYSENTER_compat_after_hwframe, SYM_L_GLOBAL) - UNTRAIN_RET /* * User tracing code (ptrace or signal handlers) might assume that @@ -114,6 +114,9 @@ SYM_INNER_LABEL(entry_SYSENTER_compat_after_hwframe, SYM_L_GLOBAL) cld + IBRS_ENTER + UNTRAIN_RET + /* * SYSENTER doesn't filter flags, so we need to clear NT and AC * ourselves. To save a few cycles, we can check whether @@ -213,7 +216,6 @@ SYM_CODE_START(entry_SYSCALL_compat) movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp SYM_INNER_LABEL(entry_SYSCALL_compat_safe_stack, SYM_L_GLOBAL) - UNTRAIN_RET /* Construct struct pt_regs on stack */ pushq $__USER32_DS /* pt_regs->ss */ @@ -255,6 +257,9 @@ SYM_INNER_LABEL(entry_SYSCALL_compat_after_hwframe, SYM_L_GLOBAL) UNWIND_HINT_REGS + IBRS_ENTER + UNTRAIN_RET + movq %rsp, %rdi call do_fast_syscall_32 /* XEN PV guests always use IRET path */ @@ -269,6 +274,8 @@ sysret32_from_system_call: */ STACKLEAK_ERASE + IBRS_EXIT + movq RBX(%rsp), %rbx /* pt_regs->rbx */ movq RBP(%rsp), %rbp /* pt_regs->rbp */ movq EFLAGS(%rsp), %r11 /* pt_regs->flags (in r11) */ @@ -380,7 +387,6 @@ SYM_CODE_START(entry_INT80_compat) pushq (%rdi) /* pt_regs->di */ .Lint80_keep_stack: - UNTRAIN_RET pushq %rsi /* pt_regs->si */ xorl %esi, %esi /* nospec si */ pushq %rdx /* pt_regs->dx */ @@ -413,6 +419,9 @@ SYM_CODE_START(entry_INT80_compat) cld + IBRS_ENTER + UNTRAIN_RET + movq %rsp, %rdi call do_int80_syscall_32 jmp swapgs_restore_regs_and_return_to_usermode diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 7f1fb9ad66b7a1..46fa58a462aca4 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -203,7 +203,7 @@ #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ /* FREE! ( 7*32+10) */ #define X86_FEATURE_PTI ( 7*32+11) /* Kernel Page Table Isolation enabled */ -/* FREE! ( 7*32+12) */ +#define X86_FEATURE_KERNEL_IBRS ( 7*32+12) /* "" Set/clear IBRS on kernel entry/exit */ /* FREE! ( 7*32+13) */ #define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */ #define X86_FEATURE_CDP_L2 ( 7*32+15) /* Code and Data Prioritization L2 */ From e03415eeba781f0b7568d359e5f161a7b3a5c7c7 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 14 Jun 2022 23:15:54 +0200 Subject: [PATCH 0709/1056] x86/bugs: Optimize SPEC_CTRL MSR writes commit c779bc1a9002fa474175b80e72b85c9bf628abb0 upstream. When changing SPEC_CTRL for user control, the WRMSR can be delayed until return-to-user when KERNEL_IBRS has been enabled. This avoids an MSR write during context switch. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov Reviewed-by: Josh Poimboeuf Signed-off-by: Borislav Petkov Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/nospec-branch.h | 2 +- arch/x86/kernel/cpu/bugs.c | 18 ++++++++++++------ arch/x86/kernel/process.c | 2 +- 3 files changed, 14 insertions(+), 8 deletions(-) diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 8b34e34b56236b..de512b73bc29e4 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -254,7 +254,7 @@ static inline void indirect_branch_prediction_barrier(void) /* The Intel SPEC CTRL MSR base value cache */ extern u64 x86_spec_ctrl_base; -extern void write_spec_ctrl_current(u64 val); +extern void write_spec_ctrl_current(u64 val, bool force); /* * With retpoline, we must use IBRS to restrict branch prediction diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index eae8ff8067fa25..ae7715385153ff 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -63,13 +63,19 @@ static DEFINE_MUTEX(spec_ctrl_mutex); * Keep track of the SPEC_CTRL MSR value for the current task, which may differ * from x86_spec_ctrl_base due to STIBP/SSB in __speculation_ctrl_update(). */ -void write_spec_ctrl_current(u64 val) +void write_spec_ctrl_current(u64 val, bool force) { if (this_cpu_read(x86_spec_ctrl_current) == val) return; this_cpu_write(x86_spec_ctrl_current, val); - wrmsrl(MSR_IA32_SPEC_CTRL, val); + + /* + * When KERNEL_IBRS this MSR is written on return-to-user, unless + * forced the update can be delayed until that time. + */ + if (force || !cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS)) + wrmsrl(MSR_IA32_SPEC_CTRL, val); } /* @@ -1290,7 +1296,7 @@ static void __init spectre_v2_select_mitigation(void) if (spectre_v2_in_eibrs_mode(mode)) { /* Force it so VMEXIT will restore correctly */ x86_spec_ctrl_base |= SPEC_CTRL_IBRS; - write_spec_ctrl_current(x86_spec_ctrl_base); + write_spec_ctrl_current(x86_spec_ctrl_base, true); } switch (mode) { @@ -1345,7 +1351,7 @@ static void __init spectre_v2_select_mitigation(void) static void update_stibp_msr(void * __unused) { - write_spec_ctrl_current(x86_spec_ctrl_base); + write_spec_ctrl_current(x86_spec_ctrl_base, true); } /* Update x86_spec_ctrl_base in case SMT state changed. */ @@ -1588,7 +1594,7 @@ static enum ssb_mitigation __init __ssb_select_mitigation(void) x86_amd_ssb_disable(); } else { x86_spec_ctrl_base |= SPEC_CTRL_SSBD; - write_spec_ctrl_current(x86_spec_ctrl_base); + write_spec_ctrl_current(x86_spec_ctrl_base, true); } } @@ -1839,7 +1845,7 @@ int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which) void x86_spec_ctrl_setup_ap(void) { if (boot_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) - write_spec_ctrl_current(x86_spec_ctrl_base); + write_spec_ctrl_current(x86_spec_ctrl_base, true); if (ssb_mode == SPEC_STORE_BYPASS_DISABLE) x86_amd_ssb_disable(); diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 7527762987358f..8d9d72fc27a209 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -584,7 +584,7 @@ static __always_inline void __speculation_ctrl_update(unsigned long tifp, } if (updmsr) - write_spec_ctrl_current(msr); + write_spec_ctrl_current(msr, false); } static unsigned long speculation_ctrl_update_tif(struct task_struct *tsk) From 347d0bf6b0c020ca0284b3fd96652470e06bd084 Mon Sep 17 00:00:00 2001 From: Pawan Gupta Date: Tue, 14 Jun 2022 23:15:55 +0200 Subject: [PATCH 0710/1056] x86/speculation: Add spectre_v2=ibrs option to support Kernel IBRS commit 7c693f54c873691a4b7da05c7e0f74e67745d144 upstream. Extend spectre_v2= boot option with Kernel IBRS. [jpoimboe: no STIBP with IBRS] Signed-off-by: Pawan Gupta Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov Reviewed-by: Josh Poimboeuf Signed-off-by: Borislav Petkov Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Greg Kroah-Hartman --- .../admin-guide/kernel-parameters.txt | 1 + arch/x86/include/asm/nospec-branch.h | 1 + arch/x86/kernel/cpu/bugs.c | 66 +++++++++++++++---- 3 files changed, 54 insertions(+), 14 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 998420529fbc01..26cfb32c21313d 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -5335,6 +5335,7 @@ eibrs - enhanced IBRS eibrs,retpoline - enhanced IBRS + Retpolines eibrs,lfence - enhanced IBRS + LFENCE + ibrs - use IBRS to protect kernel Not specifying this option is equivalent to spectre_v2=auto. diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index de512b73bc29e4..a45799bed28144 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -212,6 +212,7 @@ enum spectre_v2_mitigation { SPECTRE_V2_EIBRS, SPECTRE_V2_EIBRS_RETPOLINE, SPECTRE_V2_EIBRS_LFENCE, + SPECTRE_V2_IBRS, }; /* The indirect branch speculation control variants */ diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index ae7715385153ff..827a2877a718ab 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -965,6 +965,7 @@ enum spectre_v2_mitigation_cmd { SPECTRE_V2_CMD_EIBRS, SPECTRE_V2_CMD_EIBRS_RETPOLINE, SPECTRE_V2_CMD_EIBRS_LFENCE, + SPECTRE_V2_CMD_IBRS, }; enum spectre_v2_user_cmd { @@ -1037,11 +1038,12 @@ spectre_v2_parse_user_cmdline(enum spectre_v2_mitigation_cmd v2_cmd) return SPECTRE_V2_USER_CMD_AUTO; } -static inline bool spectre_v2_in_eibrs_mode(enum spectre_v2_mitigation mode) +static inline bool spectre_v2_in_ibrs_mode(enum spectre_v2_mitigation mode) { - return (mode == SPECTRE_V2_EIBRS || - mode == SPECTRE_V2_EIBRS_RETPOLINE || - mode == SPECTRE_V2_EIBRS_LFENCE); + return mode == SPECTRE_V2_IBRS || + mode == SPECTRE_V2_EIBRS || + mode == SPECTRE_V2_EIBRS_RETPOLINE || + mode == SPECTRE_V2_EIBRS_LFENCE; } static void __init @@ -1106,12 +1108,12 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd) } /* - * If no STIBP, enhanced IBRS is enabled or SMT impossible, STIBP is not - * required. + * If no STIBP, IBRS or enhanced IBRS is enabled, or SMT impossible, + * STIBP is not required. */ if (!boot_cpu_has(X86_FEATURE_STIBP) || !smt_possible || - spectre_v2_in_eibrs_mode(spectre_v2_enabled)) + spectre_v2_in_ibrs_mode(spectre_v2_enabled)) return; /* @@ -1143,6 +1145,7 @@ static const char * const spectre_v2_strings[] = { [SPECTRE_V2_EIBRS] = "Mitigation: Enhanced IBRS", [SPECTRE_V2_EIBRS_LFENCE] = "Mitigation: Enhanced IBRS + LFENCE", [SPECTRE_V2_EIBRS_RETPOLINE] = "Mitigation: Enhanced IBRS + Retpolines", + [SPECTRE_V2_IBRS] = "Mitigation: IBRS", }; static const struct { @@ -1160,6 +1163,7 @@ static const struct { { "eibrs,lfence", SPECTRE_V2_CMD_EIBRS_LFENCE, false }, { "eibrs,retpoline", SPECTRE_V2_CMD_EIBRS_RETPOLINE, false }, { "auto", SPECTRE_V2_CMD_AUTO, false }, + { "ibrs", SPECTRE_V2_CMD_IBRS, false }, }; static void __init spec_v2_print_cond(const char *reason, bool secure) @@ -1222,6 +1226,24 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void) return SPECTRE_V2_CMD_AUTO; } + if (cmd == SPECTRE_V2_CMD_IBRS && boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) { + pr_err("%s selected but not Intel CPU. Switching to AUTO select\n", + mitigation_options[i].option); + return SPECTRE_V2_CMD_AUTO; + } + + if (cmd == SPECTRE_V2_CMD_IBRS && !boot_cpu_has(X86_FEATURE_IBRS)) { + pr_err("%s selected but CPU doesn't have IBRS. Switching to AUTO select\n", + mitigation_options[i].option); + return SPECTRE_V2_CMD_AUTO; + } + + if (cmd == SPECTRE_V2_CMD_IBRS && boot_cpu_has(X86_FEATURE_XENPV)) { + pr_err("%s selected but running as XenPV guest. Switching to AUTO select\n", + mitigation_options[i].option); + return SPECTRE_V2_CMD_AUTO; + } + spec_v2_print_cond(mitigation_options[i].option, mitigation_options[i].secure); return cmd; @@ -1261,6 +1283,14 @@ static void __init spectre_v2_select_mitigation(void) break; } + if (boot_cpu_has_bug(X86_BUG_RETBLEED) && + retbleed_cmd != RETBLEED_CMD_OFF && + boot_cpu_has(X86_FEATURE_IBRS) && + boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) { + mode = SPECTRE_V2_IBRS; + break; + } + mode = spectre_v2_select_retpoline(); break; @@ -1277,6 +1307,10 @@ static void __init spectre_v2_select_mitigation(void) mode = spectre_v2_select_retpoline(); break; + case SPECTRE_V2_CMD_IBRS: + mode = SPECTRE_V2_IBRS; + break; + case SPECTRE_V2_CMD_EIBRS: mode = SPECTRE_V2_EIBRS; break; @@ -1293,7 +1327,7 @@ static void __init spectre_v2_select_mitigation(void) if (mode == SPECTRE_V2_EIBRS && unprivileged_ebpf_enabled()) pr_err(SPECTRE_V2_EIBRS_EBPF_MSG); - if (spectre_v2_in_eibrs_mode(mode)) { + if (spectre_v2_in_ibrs_mode(mode)) { /* Force it so VMEXIT will restore correctly */ x86_spec_ctrl_base |= SPEC_CTRL_IBRS; write_spec_ctrl_current(x86_spec_ctrl_base, true); @@ -1304,6 +1338,10 @@ static void __init spectre_v2_select_mitigation(void) case SPECTRE_V2_EIBRS: break; + case SPECTRE_V2_IBRS: + setup_force_cpu_cap(X86_FEATURE_KERNEL_IBRS); + break; + case SPECTRE_V2_LFENCE: case SPECTRE_V2_EIBRS_LFENCE: setup_force_cpu_cap(X86_FEATURE_RETPOLINE_LFENCE); @@ -1330,17 +1368,17 @@ static void __init spectre_v2_select_mitigation(void) pr_info("Spectre v2 / SpectreRSB mitigation: Filling RSB on context switch\n"); /* - * Retpoline means the kernel is safe because it has no indirect - * branches. Enhanced IBRS protects firmware too, so, enable restricted - * speculation around firmware calls only when Enhanced IBRS isn't - * supported. + * Retpoline protects the kernel, but doesn't protect firmware. IBRS + * and Enhanced IBRS protect firmware too, so enable IBRS around + * firmware calls only when IBRS / Enhanced IBRS aren't otherwise + * enabled. * * Use "mode" to check Enhanced IBRS instead of boot_cpu_has(), because * the user might select retpoline on the kernel command line and if * the CPU supports Enhanced IBRS, kernel might un-intentionally not * enable IBRS around firmware calls. */ - if (boot_cpu_has(X86_FEATURE_IBRS) && !spectre_v2_in_eibrs_mode(mode)) { + if (boot_cpu_has(X86_FEATURE_IBRS) && !spectre_v2_in_ibrs_mode(mode)) { setup_force_cpu_cap(X86_FEATURE_USE_IBRS_FW); pr_info("Enabling Restricted Speculation for firmware calls\n"); } @@ -2082,7 +2120,7 @@ static ssize_t mmio_stale_data_show_state(char *buf) static char *stibp_state(void) { - if (spectre_v2_in_eibrs_mode(spectre_v2_enabled)) + if (spectre_v2_in_ibrs_mode(spectre_v2_enabled)) return ""; switch (spectre_v2_user_stibp) { From b9349805e65380c3de8fff90a05472e131ce96f8 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 14 Jun 2022 23:15:56 +0200 Subject: [PATCH 0711/1056] x86/bugs: Split spectre_v2_select_mitigation() and spectre_v2_user_select_mitigation() commit 166115c08a9b0b846b783088808a27d739be6e8d upstream. retbleed will depend on spectre_v2, while spectre_v2_user depends on retbleed. Break this cycle. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov Reviewed-by: Josh Poimboeuf Signed-off-by: Borislav Petkov Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 827a2877a718ab..01c67bb99f9428 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -37,8 +37,9 @@ #include "cpu.h" static void __init spectre_v1_select_mitigation(void); -static void __init retbleed_select_mitigation(void); static void __init spectre_v2_select_mitigation(void); +static void __init retbleed_select_mitigation(void); +static void __init spectre_v2_user_select_mitigation(void); static void __init ssb_select_mitigation(void); static void __init l1tf_select_mitigation(void); static void __init mds_select_mitigation(void); @@ -145,13 +146,19 @@ void __init check_bugs(void) /* Select the proper CPU mitigations before patching alternatives: */ spectre_v1_select_mitigation(); + spectre_v2_select_mitigation(); + /* + * retbleed_select_mitigation() relies on the state set by + * spectre_v2_select_mitigation(); specifically it wants to know about + * spectre_v2=ibrs. + */ retbleed_select_mitigation(); /* - * spectre_v2_select_mitigation() relies on the state set by + * spectre_v2_user_select_mitigation() relies on the state set by * retbleed_select_mitigation(); specifically the STIBP selection is * forced for UNRET. */ - spectre_v2_select_mitigation(); + spectre_v2_user_select_mitigation(); ssb_select_mitigation(); l1tf_select_mitigation(); md_clear_select_mitigation(); @@ -1006,13 +1013,15 @@ static void __init spec_v2_user_print_cond(const char *reason, bool secure) pr_info("spectre_v2_user=%s forced on command line.\n", reason); } +static __ro_after_init enum spectre_v2_mitigation_cmd spectre_v2_cmd; + static enum spectre_v2_user_cmd __init -spectre_v2_parse_user_cmdline(enum spectre_v2_mitigation_cmd v2_cmd) +spectre_v2_parse_user_cmdline(void) { char arg[20]; int ret, i; - switch (v2_cmd) { + switch (spectre_v2_cmd) { case SPECTRE_V2_CMD_NONE: return SPECTRE_V2_USER_CMD_NONE; case SPECTRE_V2_CMD_FORCE: @@ -1047,7 +1056,7 @@ static inline bool spectre_v2_in_ibrs_mode(enum spectre_v2_mitigation mode) } static void __init -spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd) +spectre_v2_user_select_mitigation(void) { enum spectre_v2_user_mitigation mode = SPECTRE_V2_USER_NONE; bool smt_possible = IS_ENABLED(CONFIG_SMP); @@ -1060,7 +1069,7 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd) cpu_smt_control == CPU_SMT_NOT_SUPPORTED) smt_possible = false; - cmd = spectre_v2_parse_user_cmdline(v2_cmd); + cmd = spectre_v2_parse_user_cmdline(); switch (cmd) { case SPECTRE_V2_USER_CMD_NONE: goto set_mode; @@ -1384,7 +1393,7 @@ static void __init spectre_v2_select_mitigation(void) } /* Set up IBPB and STIBP depending on the general spectre V2 command */ - spectre_v2_user_select_mitigation(cmd); + spectre_v2_cmd = cmd; } static void update_stibp_msr(void * __unused) From e51769df6228c57a33dc2546951b7e18c08d1b07 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 24 Jun 2022 13:48:58 +0200 Subject: [PATCH 0712/1056] x86/bugs: Report Intel retbleed vulnerability commit 6ad0ad2bf8a67e27d1f9d006a1dabb0e1c360cc3 upstream. Skylake suffers from RSB underflow speculation issues; report this vulnerability and it's mitigation (spectre_v2=ibrs). [jpoimboe: cleanups, eibrs] Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov Reviewed-by: Josh Poimboeuf Signed-off-by: Borislav Petkov Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/msr-index.h | 1 + arch/x86/kernel/cpu/bugs.c | 39 +++++++++++++++++++++++++++----- arch/x86/kernel/cpu/common.c | 24 ++++++++++---------- 3 files changed, 46 insertions(+), 18 deletions(-) diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 7dc5a3306f37a7..320826cbfb6976 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -91,6 +91,7 @@ #define MSR_IA32_ARCH_CAPABILITIES 0x0000010a #define ARCH_CAP_RDCL_NO BIT(0) /* Not susceptible to Meltdown */ #define ARCH_CAP_IBRS_ALL BIT(1) /* Enhanced IBRS support */ +#define ARCH_CAP_RSBA BIT(2) /* RET may use alternative branch predictors */ #define ARCH_CAP_SKIP_VMENTRY_L1DFLUSH BIT(3) /* Skip L1D flush on vmentry */ #define ARCH_CAP_SSB_NO BIT(4) /* * Not susceptible to Speculative Store Bypass diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 01c67bb99f9428..cc4e71804ec232 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -783,12 +783,17 @@ static int __init nospectre_v1_cmdline(char *str) } early_param("nospectre_v1", nospectre_v1_cmdline); +static enum spectre_v2_mitigation spectre_v2_enabled __ro_after_init = + SPECTRE_V2_NONE; + #undef pr_fmt #define pr_fmt(fmt) "RETBleed: " fmt enum retbleed_mitigation { RETBLEED_MITIGATION_NONE, RETBLEED_MITIGATION_UNRET, + RETBLEED_MITIGATION_IBRS, + RETBLEED_MITIGATION_EIBRS, }; enum retbleed_mitigation_cmd { @@ -800,6 +805,8 @@ enum retbleed_mitigation_cmd { const char * const retbleed_strings[] = { [RETBLEED_MITIGATION_NONE] = "Vulnerable", [RETBLEED_MITIGATION_UNRET] = "Mitigation: untrained return thunk", + [RETBLEED_MITIGATION_IBRS] = "Mitigation: IBRS", + [RETBLEED_MITIGATION_EIBRS] = "Mitigation: Enhanced IBRS", }; static enum retbleed_mitigation retbleed_mitigation __ro_after_init = @@ -842,6 +849,7 @@ early_param("retbleed", retbleed_parse_cmdline); #define RETBLEED_UNTRAIN_MSG "WARNING: BTB untrained return thunk mitigation is only effective on AMD/Hygon!\n" #define RETBLEED_COMPILER_MSG "WARNING: kernel not compiled with RETPOLINE or -mfunction-return capable compiler!\n" +#define RETBLEED_INTEL_MSG "WARNING: Spectre v2 mitigation leaves CPU vulnerable to RETBleed attacks, data leaks possible!\n" static void __init retbleed_select_mitigation(void) { @@ -858,12 +866,15 @@ static void __init retbleed_select_mitigation(void) case RETBLEED_CMD_AUTO: default: - if (!boot_cpu_has_bug(X86_BUG_RETBLEED)) - break; - if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD || boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) retbleed_mitigation = RETBLEED_MITIGATION_UNRET; + + /* + * The Intel mitigation (IBRS) was already selected in + * spectre_v2_select_mitigation(). + */ + break; } @@ -893,15 +904,31 @@ static void __init retbleed_select_mitigation(void) break; } + /* + * Let IBRS trump all on Intel without affecting the effects of the + * retbleed= cmdline option. + */ + if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) { + switch (spectre_v2_enabled) { + case SPECTRE_V2_IBRS: + retbleed_mitigation = RETBLEED_MITIGATION_IBRS; + break; + case SPECTRE_V2_EIBRS: + case SPECTRE_V2_EIBRS_RETPOLINE: + case SPECTRE_V2_EIBRS_LFENCE: + retbleed_mitigation = RETBLEED_MITIGATION_EIBRS; + break; + default: + pr_err(RETBLEED_INTEL_MSG); + } + } + pr_info("%s\n", retbleed_strings[retbleed_mitigation]); } #undef pr_fmt #define pr_fmt(fmt) "Spectre V2 : " fmt -static enum spectre_v2_mitigation spectre_v2_enabled __ro_after_init = - SPECTRE_V2_NONE; - static enum spectre_v2_user_mitigation spectre_v2_user_stibp __ro_after_init = SPECTRE_V2_USER_NONE; static enum spectre_v2_user_mitigation spectre_v2_user_ibpb __ro_after_init = diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index d5488cff3e011f..39e9e84e75afa9 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1127,24 +1127,24 @@ static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = { VULNBL_INTEL_STEPPINGS(BROADWELL_G, X86_STEPPING_ANY, SRBDS), VULNBL_INTEL_STEPPINGS(BROADWELL_X, X86_STEPPING_ANY, MMIO), VULNBL_INTEL_STEPPINGS(BROADWELL, X86_STEPPING_ANY, SRBDS), - VULNBL_INTEL_STEPPINGS(SKYLAKE_L, X86_STEPPINGS(0x3, 0x3), SRBDS | MMIO), + VULNBL_INTEL_STEPPINGS(SKYLAKE_L, X86_STEPPINGS(0x3, 0x3), SRBDS | MMIO | RETBLEED), VULNBL_INTEL_STEPPINGS(SKYLAKE_L, X86_STEPPING_ANY, SRBDS), VULNBL_INTEL_STEPPINGS(SKYLAKE_X, BIT(3) | BIT(4) | BIT(6) | - BIT(7) | BIT(0xB), MMIO), - VULNBL_INTEL_STEPPINGS(SKYLAKE, X86_STEPPINGS(0x3, 0x3), SRBDS | MMIO), + BIT(7) | BIT(0xB), MMIO | RETBLEED), + VULNBL_INTEL_STEPPINGS(SKYLAKE, X86_STEPPINGS(0x3, 0x3), SRBDS | MMIO | RETBLEED), VULNBL_INTEL_STEPPINGS(SKYLAKE, X86_STEPPING_ANY, SRBDS), - VULNBL_INTEL_STEPPINGS(KABYLAKE_L, X86_STEPPINGS(0x9, 0xC), SRBDS | MMIO), + VULNBL_INTEL_STEPPINGS(KABYLAKE_L, X86_STEPPINGS(0x9, 0xC), SRBDS | MMIO | RETBLEED), VULNBL_INTEL_STEPPINGS(KABYLAKE_L, X86_STEPPINGS(0x0, 0x8), SRBDS), - VULNBL_INTEL_STEPPINGS(KABYLAKE, X86_STEPPINGS(0x9, 0xD), SRBDS | MMIO), + VULNBL_INTEL_STEPPINGS(KABYLAKE, X86_STEPPINGS(0x9, 0xD), SRBDS | MMIO | RETBLEED), VULNBL_INTEL_STEPPINGS(KABYLAKE, X86_STEPPINGS(0x0, 0x8), SRBDS), - VULNBL_INTEL_STEPPINGS(ICELAKE_L, X86_STEPPINGS(0x5, 0x5), MMIO | MMIO_SBDS), + VULNBL_INTEL_STEPPINGS(ICELAKE_L, X86_STEPPINGS(0x5, 0x5), MMIO | MMIO_SBDS | RETBLEED), VULNBL_INTEL_STEPPINGS(ICELAKE_D, X86_STEPPINGS(0x1, 0x1), MMIO), VULNBL_INTEL_STEPPINGS(ICELAKE_X, X86_STEPPINGS(0x4, 0x6), MMIO), - VULNBL_INTEL_STEPPINGS(COMETLAKE, BIT(2) | BIT(3) | BIT(5), MMIO | MMIO_SBDS), - VULNBL_INTEL_STEPPINGS(COMETLAKE_L, X86_STEPPINGS(0x1, 0x1), MMIO | MMIO_SBDS), - VULNBL_INTEL_STEPPINGS(COMETLAKE_L, X86_STEPPINGS(0x0, 0x0), MMIO), - VULNBL_INTEL_STEPPINGS(LAKEFIELD, X86_STEPPINGS(0x1, 0x1), MMIO | MMIO_SBDS), - VULNBL_INTEL_STEPPINGS(ROCKETLAKE, X86_STEPPINGS(0x1, 0x1), MMIO), + VULNBL_INTEL_STEPPINGS(COMETLAKE, BIT(2) | BIT(3) | BIT(5), MMIO | MMIO_SBDS | RETBLEED), + VULNBL_INTEL_STEPPINGS(COMETLAKE_L, X86_STEPPINGS(0x1, 0x1), MMIO | MMIO_SBDS | RETBLEED), + VULNBL_INTEL_STEPPINGS(COMETLAKE_L, X86_STEPPINGS(0x0, 0x0), MMIO | RETBLEED), + VULNBL_INTEL_STEPPINGS(LAKEFIELD, X86_STEPPINGS(0x1, 0x1), MMIO | MMIO_SBDS | RETBLEED), + VULNBL_INTEL_STEPPINGS(ROCKETLAKE, X86_STEPPINGS(0x1, 0x1), MMIO | RETBLEED), VULNBL_INTEL_STEPPINGS(ATOM_TREMONT, X86_STEPPINGS(0x1, 0x1), MMIO | MMIO_SBDS), VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_D, X86_STEPPING_ANY, MMIO), VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_L, X86_STEPPINGS(0x0, 0x0), MMIO | MMIO_SBDS), @@ -1254,7 +1254,7 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) !arch_cap_mmio_immune(ia32_cap)) setup_force_cpu_bug(X86_BUG_MMIO_STALE_DATA); - if (cpu_matches(cpu_vuln_blacklist, RETBLEED)) + if ((cpu_matches(cpu_vuln_blacklist, RETBLEED) || (ia32_cap & ARCH_CAP_RSBA))) setup_force_cpu_bug(X86_BUG_RETBLEED); if (cpu_matches(cpu_vuln_whitelist, NO_MELTDOWN)) From a1d912d7ac04d6a010c2b3a637a244e088d7f5bc Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 14 Jun 2022 23:15:58 +0200 Subject: [PATCH 0713/1056] intel_idle: Disable IBRS during long idle commit bf5835bcdb9635c97f85120dba9bfa21e111130f upstream. Having IBRS enabled while the SMT sibling is idle unnecessarily slows down the running sibling. OTOH, disabling IBRS around idle takes two MSR writes, which will increase the idle latency. Therefore, only disable IBRS around deeper idle states. Shallow idle states are bounded by the tick in duration, since NOHZ is not allowed for them by virtue of their short target residency. Only do this for mwait-driven idle, since that keeps interrupts disabled across idle, which makes disabling IBRS vs IRQ-entry a non-issue. Note: C6 is a random threshold, most importantly C1 probably shouldn't disable IBRS, benchmarking needed. Suggested-by: Tim Chen Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov Reviewed-by: Josh Poimboeuf Signed-off-by: Borislav Petkov [cascardo: no CPUIDLE_FLAG_IRQ_ENABLE] Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/nospec-branch.h | 1 + arch/x86/kernel/cpu/bugs.c | 6 ++++ drivers/idle/intel_idle.c | 43 ++++++++++++++++++++++++---- 3 files changed, 44 insertions(+), 6 deletions(-) diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index a45799bed28144..af354282b72047 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -256,6 +256,7 @@ static inline void indirect_branch_prediction_barrier(void) /* The Intel SPEC CTRL MSR base value cache */ extern u64 x86_spec_ctrl_base; extern void write_spec_ctrl_current(u64 val, bool force); +extern u64 spec_ctrl_current(void); /* * With retpoline, we must use IBRS to restrict branch prediction diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index cc4e71804ec232..5d922aef5c3b7e 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -79,6 +79,12 @@ void write_spec_ctrl_current(u64 val, bool force) wrmsrl(MSR_IA32_SPEC_CTRL, val); } +u64 spec_ctrl_current(void) +{ + return this_cpu_read(x86_spec_ctrl_current); +} +EXPORT_SYMBOL_GPL(spec_ctrl_current); + /* * The vendor and possibly platform specific bits which can be modified in * x86_spec_ctrl_base. diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index e6c543b5ee1dd6..376e631e80d697 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -47,11 +47,13 @@ #include #include #include +#include #include #include #include #include #include +#include #include #include @@ -93,6 +95,12 @@ static unsigned int mwait_substates __initdata; */ #define CPUIDLE_FLAG_ALWAYS_ENABLE BIT(15) +/* + * Disable IBRS across idle (when KERNEL_IBRS), is exclusive vs IRQ_ENABLE + * above. + */ +#define CPUIDLE_FLAG_IBRS BIT(16) + /* * MWAIT takes an 8-bit "hint" in EAX "suggesting" * the C-state (top nibble) and sub-state (bottom nibble) @@ -132,6 +140,24 @@ static __cpuidle int intel_idle(struct cpuidle_device *dev, return index; } +static __cpuidle int intel_idle_ibrs(struct cpuidle_device *dev, + struct cpuidle_driver *drv, int index) +{ + bool smt_active = sched_smt_active(); + u64 spec_ctrl = spec_ctrl_current(); + int ret; + + if (smt_active) + wrmsrl(MSR_IA32_SPEC_CTRL, 0); + + ret = intel_idle(dev, drv, index); + + if (smt_active) + wrmsrl(MSR_IA32_SPEC_CTRL, spec_ctrl); + + return ret; +} + /** * intel_idle_s2idle - Ask the processor to enter the given idle state. * @dev: cpuidle device of the target CPU. @@ -653,7 +679,7 @@ static struct cpuidle_state skl_cstates[] __initdata = { { .name = "C6", .desc = "MWAIT 0x20", - .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, + .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS, .exit_latency = 85, .target_residency = 200, .enter = &intel_idle, @@ -661,7 +687,7 @@ static struct cpuidle_state skl_cstates[] __initdata = { { .name = "C7s", .desc = "MWAIT 0x33", - .flags = MWAIT2flg(0x33) | CPUIDLE_FLAG_TLB_FLUSHED, + .flags = MWAIT2flg(0x33) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS, .exit_latency = 124, .target_residency = 800, .enter = &intel_idle, @@ -669,7 +695,7 @@ static struct cpuidle_state skl_cstates[] __initdata = { { .name = "C8", .desc = "MWAIT 0x40", - .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, + .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS, .exit_latency = 200, .target_residency = 800, .enter = &intel_idle, @@ -677,7 +703,7 @@ static struct cpuidle_state skl_cstates[] __initdata = { { .name = "C9", .desc = "MWAIT 0x50", - .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED, + .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS, .exit_latency = 480, .target_residency = 5000, .enter = &intel_idle, @@ -685,7 +711,7 @@ static struct cpuidle_state skl_cstates[] __initdata = { { .name = "C10", .desc = "MWAIT 0x60", - .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, + .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS, .exit_latency = 890, .target_residency = 5000, .enter = &intel_idle, @@ -714,7 +740,7 @@ static struct cpuidle_state skx_cstates[] __initdata = { { .name = "C6", .desc = "MWAIT 0x20", - .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, + .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS, .exit_latency = 133, .target_residency = 600, .enter = &intel_idle, @@ -1574,6 +1600,11 @@ static void __init intel_idle_init_cstates_icpu(struct cpuidle_driver *drv) /* Structure copy. */ drv->states[drv->state_count] = cpuidle_state_table[cstate]; + if (cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS) && + cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_IBRS) { + drv->states[drv->state_count].enter = intel_idle_ibrs; + } + if ((disabled_states_mask & BIT(drv->state_count)) || ((icpu->use_acpi || force_use_acpi) && intel_idle_off_by_default(mwait_hint) && From e894b7817e08a588f6046f3f7e20c05dd9c6e16f Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 14 Jun 2022 23:15:59 +0200 Subject: [PATCH 0714/1056] objtool: Update Retpoline validation commit 9bb2ec608a209018080ca262f771e6a9ff203b6f upstream. Update retpoline validation with the new CONFIG_RETPOLINE requirement of not having bare naked RET instructions. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov Reviewed-by: Josh Poimboeuf Signed-off-by: Borislav Petkov [cascardo: conflict fixup at arch/x86/xen/xen-head.S] Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/nospec-branch.h | 6 ++++++ arch/x86/mm/mem_encrypt_boot.S | 2 ++ arch/x86/xen/xen-head.S | 1 + tools/objtool/check.c | 19 +++++++++++++------ 4 files changed, 22 insertions(+), 6 deletions(-) diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index af354282b72047..8c0da9bdc1e01a 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -75,6 +75,12 @@ .popsection .endm +/* + * (ab)use RETPOLINE_SAFE on RET to annotate away 'bare' RET instructions + * vs RETBleed validation. + */ +#define ANNOTATE_UNRET_SAFE ANNOTATE_RETPOLINE_SAFE + /* * JMP_NOSPEC and CALL_NOSPEC macros can be used instead of a simple * indirect jmp/call which may be susceptible to the Spectre variant 2 diff --git a/arch/x86/mm/mem_encrypt_boot.S b/arch/x86/mm/mem_encrypt_boot.S index d94dea450fa60b..9de3d900bc9271 100644 --- a/arch/x86/mm/mem_encrypt_boot.S +++ b/arch/x86/mm/mem_encrypt_boot.S @@ -66,6 +66,7 @@ SYM_FUNC_START(sme_encrypt_execute) pop %rbp /* Offset to __x86_return_thunk would be wrong here */ + ANNOTATE_UNRET_SAFE ret int3 SYM_FUNC_END(sme_encrypt_execute) @@ -154,6 +155,7 @@ SYM_FUNC_START(__enc_copy) pop %r15 /* Offset to __x86_return_thunk would be wrong here */ + ANNOTATE_UNRET_SAFE ret int3 .L__enc_copy_end: diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S index 565062932ef11d..38b73e7e54ba7c 100644 --- a/arch/x86/xen/xen-head.S +++ b/arch/x86/xen/xen-head.S @@ -70,6 +70,7 @@ SYM_CODE_START(hypercall_page) .rept (PAGE_SIZE / 32) UNWIND_HINT_FUNC .skip 31, 0x90 + ANNOTATE_UNRET_SAFE RET .endr diff --git a/tools/objtool/check.c b/tools/objtool/check.c index a90f0d65771600..6cc7e31a3e6a0a 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -1894,8 +1894,9 @@ static int read_retpoline_hints(struct objtool_file *file) } if (insn->type != INSN_JUMP_DYNAMIC && - insn->type != INSN_CALL_DYNAMIC) { - WARN_FUNC("retpoline_safe hint not an indirect jump/call", + insn->type != INSN_CALL_DYNAMIC && + insn->type != INSN_RETURN) { + WARN_FUNC("retpoline_safe hint not an indirect jump/call/ret", insn->sec, insn->offset); return -1; } @@ -3229,7 +3230,8 @@ static int validate_retpoline(struct objtool_file *file) for_each_insn(file, insn) { if (insn->type != INSN_JUMP_DYNAMIC && - insn->type != INSN_CALL_DYNAMIC) + insn->type != INSN_CALL_DYNAMIC && + insn->type != INSN_RETURN) continue; if (insn->retpoline_safe) @@ -3244,9 +3246,14 @@ static int validate_retpoline(struct objtool_file *file) if (!strcmp(insn->sec->name, ".init.text") && !module) continue; - WARN_FUNC("indirect %s found in RETPOLINE build", - insn->sec, insn->offset, - insn->type == INSN_JUMP_DYNAMIC ? "jump" : "call"); + if (insn->type == INSN_RETURN) { + WARN_FUNC("'naked' return found in RETPOLINE build", + insn->sec, insn->offset); + } else { + WARN_FUNC("indirect %s found in RETPOLINE build", + insn->sec, insn->offset, + insn->type == INSN_JUMP_DYNAMIC ? "jump" : "call"); + } warnings++; } From 87e6270183a4062b46c21bf483539eea7854df6a Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 14 Jun 2022 23:16:00 +0200 Subject: [PATCH 0715/1056] x86/xen: Rename SYS* entry points commit b75b7f8ef1148be1b9321ffc2f6c19238904b438 upstream. Native SYS{CALL,ENTER} entry points are called entry_SYS{CALL,ENTER}_{64,compat}, make sure the Xen versions are named consistently. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov Reviewed-by: Josh Poimboeuf Signed-off-by: Borislav Petkov Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Greg Kroah-Hartman --- arch/x86/xen/setup.c | 6 +++--- arch/x86/xen/xen-asm.S | 20 ++++++++++---------- arch/x86/xen/xen-ops.h | 6 +++--- 3 files changed, 16 insertions(+), 16 deletions(-) diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index 8bfc1033010770..1f80dd3a2dd4ab 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -922,7 +922,7 @@ void xen_enable_sysenter(void) if (!boot_cpu_has(sysenter_feature)) return; - ret = register_callback(CALLBACKTYPE_sysenter, xen_sysenter_target); + ret = register_callback(CALLBACKTYPE_sysenter, xen_entry_SYSENTER_compat); if(ret != 0) setup_clear_cpu_cap(sysenter_feature); } @@ -931,7 +931,7 @@ void xen_enable_syscall(void) { int ret; - ret = register_callback(CALLBACKTYPE_syscall, xen_syscall_target); + ret = register_callback(CALLBACKTYPE_syscall, xen_entry_SYSCALL_64); if (ret != 0) { printk(KERN_ERR "Failed to set syscall callback: %d\n", ret); /* Pretty fatal; 64-bit userspace has no other @@ -940,7 +940,7 @@ void xen_enable_syscall(void) if (boot_cpu_has(X86_FEATURE_SYSCALL32)) { ret = register_callback(CALLBACKTYPE_syscall32, - xen_syscall32_target); + xen_entry_SYSCALL_compat); if (ret != 0) setup_clear_cpu_cap(X86_FEATURE_SYSCALL32); } diff --git a/arch/x86/xen/xen-asm.S b/arch/x86/xen/xen-asm.S index 962d30ea01a22f..2cf22624012cd5 100644 --- a/arch/x86/xen/xen-asm.S +++ b/arch/x86/xen/xen-asm.S @@ -227,7 +227,7 @@ SYM_CODE_END(xenpv_restore_regs_and_return_to_usermode) */ /* Normal 64-bit system call target */ -SYM_CODE_START(xen_syscall_target) +SYM_CODE_START(xen_entry_SYSCALL_64) UNWIND_HINT_EMPTY popq %rcx popq %r11 @@ -241,12 +241,12 @@ SYM_CODE_START(xen_syscall_target) movq $__USER_CS, 1*8(%rsp) jmp entry_SYSCALL_64_after_hwframe -SYM_CODE_END(xen_syscall_target) +SYM_CODE_END(xen_entry_SYSCALL_64) #ifdef CONFIG_IA32_EMULATION /* 32-bit compat syscall target */ -SYM_CODE_START(xen_syscall32_target) +SYM_CODE_START(xen_entry_SYSCALL_compat) UNWIND_HINT_EMPTY popq %rcx popq %r11 @@ -260,10 +260,10 @@ SYM_CODE_START(xen_syscall32_target) movq $__USER32_CS, 1*8(%rsp) jmp entry_SYSCALL_compat_after_hwframe -SYM_CODE_END(xen_syscall32_target) +SYM_CODE_END(xen_entry_SYSCALL_compat) /* 32-bit compat sysenter target */ -SYM_CODE_START(xen_sysenter_target) +SYM_CODE_START(xen_entry_SYSENTER_compat) UNWIND_HINT_EMPTY /* * NB: Xen is polite and clears TF from EFLAGS for us. This means @@ -281,18 +281,18 @@ SYM_CODE_START(xen_sysenter_target) movq $__USER32_CS, 1*8(%rsp) jmp entry_SYSENTER_compat_after_hwframe -SYM_CODE_END(xen_sysenter_target) +SYM_CODE_END(xen_entry_SYSENTER_compat) #else /* !CONFIG_IA32_EMULATION */ -SYM_CODE_START(xen_syscall32_target) -SYM_CODE_START(xen_sysenter_target) +SYM_CODE_START(xen_entry_SYSCALL_compat) +SYM_CODE_START(xen_entry_SYSENTER_compat) UNWIND_HINT_EMPTY lea 16(%rsp), %rsp /* strip %rcx, %r11 */ mov $-ENOSYS, %rax pushq $0 jmp hypercall_iret -SYM_CODE_END(xen_sysenter_target) -SYM_CODE_END(xen_syscall32_target) +SYM_CODE_END(xen_entry_SYSENTER_compat) +SYM_CODE_END(xen_entry_SYSCALL_compat) #endif /* CONFIG_IA32_EMULATION */ diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index 8bc8b72a205d46..16aed4b121297c 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -10,10 +10,10 @@ /* These are code, but not functions. Defined in entry.S */ extern const char xen_failsafe_callback[]; -void xen_sysenter_target(void); +void xen_entry_SYSENTER_compat(void); #ifdef CONFIG_X86_64 -void xen_syscall_target(void); -void xen_syscall32_target(void); +void xen_entry_SYSCALL_64(void); +void xen_entry_SYSCALL_compat(void); #endif extern void *xen_initial_gdt; From 8894f699f187dd1bb2e62b683b15d064ba5fb583 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 14 Jun 2022 23:16:01 +0200 Subject: [PATCH 0716/1056] x86/xen: Add UNTRAIN_RET commit d147553b64bad34d2f92cb7d8ba454ae95c3baac upstream. Ensure the Xen entry also passes through UNTRAIN_RET. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov Reviewed-by: Josh Poimboeuf Signed-off-by: Borislav Petkov Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Greg Kroah-Hartman --- arch/x86/entry/entry_64.S | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index ebeeb014320943..f9afa650955f1f 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -320,6 +320,12 @@ SYM_CODE_END(ret_from_fork) #endif .endm +SYM_CODE_START_LOCAL(xen_error_entry) + UNWIND_HINT_FUNC + UNTRAIN_RET + RET +SYM_CODE_END(xen_error_entry) + /** * idtentry_body - Macro to emit code calling the C function * @cfunc: C function to be called @@ -339,7 +345,7 @@ SYM_CODE_END(ret_from_fork) * switch the CR3. So it can skip invoking error_entry(). */ ALTERNATIVE "call error_entry; movq %rax, %rsp", \ - "", X86_FEATURE_XENPV + "call xen_error_entry", X86_FEATURE_XENPV ENCODE_FRAME_POINTER UNWIND_HINT_REGS From fd17a4254965fc44044b3d7f5be186880ed7a021 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 14 Jun 2022 23:16:02 +0200 Subject: [PATCH 0717/1056] x86/bugs: Add retbleed=ibpb commit 3ebc170068885b6fc7bedda6c667bb2c4d533159 upstream. jmp2ret mitigates the easy-to-attack case at relatively low overhead. It mitigates the long speculation windows after a mispredicted RET, but it does not mitigate the short speculation window from arbitrary instruction boundaries. On Zen2, there is a chicken bit which needs setting, which mitigates "arbitrary instruction boundaries" down to just "basic block boundaries". But there is no fix for the short speculation window on basic block boundaries, other than to flush the entire BTB to evict all attacker predictions. On the spectrum of "fast & blurry" -> "safe", there is (on top of STIBP or no-SMT): 1) Nothing System wide open 2) jmp2ret May stop a script kiddy 3) jmp2ret+chickenbit Raises the bar rather further 4) IBPB Only thing which can count as "safe". Tentative numbers put IBPB-on-entry at a 2.5x hit on Zen2, and a 10x hit on Zen1 according to lmbench. [ bp: Fixup feature bit comments, document option, 32-bit build fix. ] Suggested-by: Andrew Cooper Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov Reviewed-by: Josh Poimboeuf Signed-off-by: Borislav Petkov Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Greg Kroah-Hartman --- .../admin-guide/kernel-parameters.txt | 3 ++ arch/x86/entry/Makefile | 2 +- arch/x86/entry/entry.S | 22 ++++++++++ arch/x86/include/asm/cpufeatures.h | 2 +- arch/x86/include/asm/nospec-branch.h | 8 +++- arch/x86/kernel/cpu/bugs.c | 43 +++++++++++++++---- 6 files changed, 67 insertions(+), 13 deletions(-) create mode 100644 arch/x86/entry/entry.S diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 26cfb32c21313d..2c556a127979e9 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -4978,6 +4978,9 @@ disabling SMT if necessary for the full mitigation (only on Zen1 and older without STIBP). + ibpb - mitigate short speculation windows on + basic block boundaries too. Safe, highest + perf impact. unret - force enable untrained return thunks, only effective on AMD f15h-f17h based systems. diff --git a/arch/x86/entry/Makefile b/arch/x86/entry/Makefile index 7fec5dcf643868..eeadbd7d92cc55 100644 --- a/arch/x86/entry/Makefile +++ b/arch/x86/entry/Makefile @@ -11,7 +11,7 @@ CFLAGS_REMOVE_common.o = $(CC_FLAGS_FTRACE) CFLAGS_common.o += -fno-stack-protector -obj-y := entry_$(BITS).o thunk_$(BITS).o syscall_$(BITS).o +obj-y := entry.o entry_$(BITS).o thunk_$(BITS).o syscall_$(BITS).o obj-y += common.o obj-y += vdso/ diff --git a/arch/x86/entry/entry.S b/arch/x86/entry/entry.S new file mode 100644 index 00000000000000..bfb7bcb362bcfc --- /dev/null +++ b/arch/x86/entry/entry.S @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Common place for both 32- and 64-bit entry routines. + */ + +#include +#include +#include + +.pushsection .noinstr.text, "ax" + +SYM_FUNC_START(entry_ibpb) + movl $MSR_IA32_PRED_CMD, %ecx + movl $PRED_CMD_IBPB, %eax + xorl %edx, %edx + wrmsr + RET +SYM_FUNC_END(entry_ibpb) +/* For KVM */ +EXPORT_SYMBOL_GPL(entry_ibpb); + +.popsection diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 46fa58a462aca4..c61654bc13aa4c 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -294,7 +294,7 @@ #define X86_FEATURE_PER_THREAD_MBA (11*32+ 7) /* "" Per-thread Memory Bandwidth Allocation */ #define X86_FEATURE_SGX1 (11*32+ 8) /* "" Basic SGX */ #define X86_FEATURE_SGX2 (11*32+ 9) /* "" SGX Enclave Dynamic Memory Management (EDMM) */ -/* FREE! (11*32+10) */ +#define X86_FEATURE_ENTRY_IBPB (11*32+10) /* "" Issue an IBPB on kernel entry */ /* FREE! (11*32+11) */ #define X86_FEATURE_RETPOLINE (11*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */ #define X86_FEATURE_RETPOLINE_LFENCE (11*32+13) /* "" Use LFENCE for Spectre variant 2 */ diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 8c0da9bdc1e01a..e4bb6dbc5ce85f 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -123,14 +123,17 @@ * return thunk isn't mapped into the userspace tables (then again, AMD * typically has NO_MELTDOWN). * - * Doesn't clobber any registers but does require a stable stack. + * While zen_untrain_ret() doesn't clobber anything but requires stack, + * entry_ibpb() will clobber AX, CX, DX. * * As such, this must be placed after every *SWITCH_TO_KERNEL_CR3 at a point * where we have a stack but before any RET instruction. */ .macro UNTRAIN_RET #ifdef CONFIG_RETPOLINE - ALTERNATIVE "", "call zen_untrain_ret", X86_FEATURE_UNRET + ALTERNATIVE_2 "", \ + "call zen_untrain_ret", X86_FEATURE_UNRET, \ + "call entry_ibpb", X86_FEATURE_ENTRY_IBPB #endif .endm @@ -144,6 +147,7 @@ extern void __x86_return_thunk(void); extern void zen_untrain_ret(void); +extern void entry_ibpb(void); #ifdef CONFIG_RETPOLINE diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 5d922aef5c3b7e..86d46f14177653 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -798,6 +798,7 @@ static enum spectre_v2_mitigation spectre_v2_enabled __ro_after_init = enum retbleed_mitigation { RETBLEED_MITIGATION_NONE, RETBLEED_MITIGATION_UNRET, + RETBLEED_MITIGATION_IBPB, RETBLEED_MITIGATION_IBRS, RETBLEED_MITIGATION_EIBRS, }; @@ -806,11 +807,13 @@ enum retbleed_mitigation_cmd { RETBLEED_CMD_OFF, RETBLEED_CMD_AUTO, RETBLEED_CMD_UNRET, + RETBLEED_CMD_IBPB, }; const char * const retbleed_strings[] = { [RETBLEED_MITIGATION_NONE] = "Vulnerable", [RETBLEED_MITIGATION_UNRET] = "Mitigation: untrained return thunk", + [RETBLEED_MITIGATION_IBPB] = "Mitigation: IBPB", [RETBLEED_MITIGATION_IBRS] = "Mitigation: IBRS", [RETBLEED_MITIGATION_EIBRS] = "Mitigation: Enhanced IBRS", }; @@ -840,6 +843,8 @@ static int __init retbleed_parse_cmdline(char *str) retbleed_cmd = RETBLEED_CMD_AUTO; } else if (!strcmp(str, "unret")) { retbleed_cmd = RETBLEED_CMD_UNRET; + } else if (!strcmp(str, "ibpb")) { + retbleed_cmd = RETBLEED_CMD_IBPB; } else if (!strcmp(str, "nosmt")) { retbleed_nosmt = true; } else { @@ -854,11 +859,13 @@ static int __init retbleed_parse_cmdline(char *str) early_param("retbleed", retbleed_parse_cmdline); #define RETBLEED_UNTRAIN_MSG "WARNING: BTB untrained return thunk mitigation is only effective on AMD/Hygon!\n" -#define RETBLEED_COMPILER_MSG "WARNING: kernel not compiled with RETPOLINE or -mfunction-return capable compiler!\n" +#define RETBLEED_COMPILER_MSG "WARNING: kernel not compiled with RETPOLINE or -mfunction-return capable compiler; falling back to IBPB!\n" #define RETBLEED_INTEL_MSG "WARNING: Spectre v2 mitigation leaves CPU vulnerable to RETBleed attacks, data leaks possible!\n" static void __init retbleed_select_mitigation(void) { + bool mitigate_smt = false; + if (!boot_cpu_has_bug(X86_BUG_RETBLEED) || cpu_mitigations_off()) return; @@ -870,11 +877,21 @@ static void __init retbleed_select_mitigation(void) retbleed_mitigation = RETBLEED_MITIGATION_UNRET; break; + case RETBLEED_CMD_IBPB: + retbleed_mitigation = RETBLEED_MITIGATION_IBPB; + break; + case RETBLEED_CMD_AUTO: default: if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD || - boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) - retbleed_mitigation = RETBLEED_MITIGATION_UNRET; + boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) { + + if (IS_ENABLED(CONFIG_RETPOLINE) && + IS_ENABLED(CONFIG_CC_HAS_RETURN_THUNK)) + retbleed_mitigation = RETBLEED_MITIGATION_UNRET; + else + retbleed_mitigation = RETBLEED_MITIGATION_IBPB; + } /* * The Intel mitigation (IBRS) was already selected in @@ -890,26 +907,34 @@ static void __init retbleed_select_mitigation(void) if (!IS_ENABLED(CONFIG_RETPOLINE) || !IS_ENABLED(CONFIG_CC_HAS_RETURN_THUNK)) { pr_err(RETBLEED_COMPILER_MSG); - retbleed_mitigation = RETBLEED_MITIGATION_NONE; - break; + retbleed_mitigation = RETBLEED_MITIGATION_IBPB; + goto retbleed_force_ibpb; } setup_force_cpu_cap(X86_FEATURE_RETHUNK); setup_force_cpu_cap(X86_FEATURE_UNRET); - if (!boot_cpu_has(X86_FEATURE_STIBP) && - (retbleed_nosmt || cpu_mitigations_auto_nosmt())) - cpu_smt_disable(false); - if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD && boot_cpu_data.x86_vendor != X86_VENDOR_HYGON) pr_err(RETBLEED_UNTRAIN_MSG); + + mitigate_smt = true; + break; + + case RETBLEED_MITIGATION_IBPB: +retbleed_force_ibpb: + setup_force_cpu_cap(X86_FEATURE_ENTRY_IBPB); + mitigate_smt = true; break; default: break; } + if (mitigate_smt && !boot_cpu_has(X86_FEATURE_STIBP) && + (retbleed_nosmt || cpu_mitigations_auto_nosmt())) + cpu_smt_disable(false); + /* * Let IBRS trump all on Intel without affecting the effects of the * retbleed= cmdline option. From 469d9b1570177275983f2f987be279f43bfb1c42 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Tue, 14 Jun 2022 15:07:19 -0700 Subject: [PATCH 0718/1056] x86/bugs: Do IBPB fallback check only once commit 0fe4aeea9c01baabecc8c3afc7889c809d939bc2 upstream. When booting with retbleed=auto, if the kernel wasn't built with CONFIG_CC_HAS_RETURN_THUNK, the mitigation falls back to IBPB. Make sure a warning is printed in that case. The IBPB fallback check is done twice, but it really only needs to be done once. Signed-off-by: Josh Poimboeuf Signed-off-by: Borislav Petkov Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 86d46f14177653..8401fe92e2d20a 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -884,18 +884,13 @@ static void __init retbleed_select_mitigation(void) case RETBLEED_CMD_AUTO: default: if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD || - boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) { - - if (IS_ENABLED(CONFIG_RETPOLINE) && - IS_ENABLED(CONFIG_CC_HAS_RETURN_THUNK)) - retbleed_mitigation = RETBLEED_MITIGATION_UNRET; - else - retbleed_mitigation = RETBLEED_MITIGATION_IBPB; - } + boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) + retbleed_mitigation = RETBLEED_MITIGATION_UNRET; /* - * The Intel mitigation (IBRS) was already selected in - * spectre_v2_select_mitigation(). + * The Intel mitigation (IBRS or eIBRS) was already selected in + * spectre_v2_select_mitigation(). 'retbleed_mitigation' will + * be set accordingly below. */ break; From dab72c3c32ccde4f362fff993759b0133448d394 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 14 Jun 2022 23:16:03 +0200 Subject: [PATCH 0719/1056] objtool: Add entry UNRET validation commit a09a6e2399ba0595c3042b3164f3ca68a3cff33e upstream. Since entry asm is tricky, add a validation pass that ensures the retbleed mitigation has been done before the first actual RET instruction. Entry points are those that either have UNWIND_HINT_ENTRY, which acts as UNWIND_HINT_EMPTY but marks the instruction as an entry point, or those that have UWIND_HINT_IRET_REGS at +0. This is basically a variant of validate_branch() that is intra-function and it will simply follow all branches from marked entry points and ensures that all paths lead to ANNOTATE_UNRET_END. If a path hits RET or an indirection the path is a fail and will be reported. There are 3 ANNOTATE_UNRET_END instances: - UNTRAIN_RET itself - exception from-kernel; this path doesn't need UNTRAIN_RET - all early exceptions; these also don't need UNTRAIN_RET Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov Reviewed-by: Josh Poimboeuf Signed-off-by: Borislav Petkov [cascardo: tools/objtool/builtin-check.c no link option validation] [cascardo: tools/objtool/check.c opts.ibt is ibt] [cascardo: tools/objtool/include/objtool/builtin.h leave unret option as bool, no struct opts] [cascardo: objtool is still called from scripts/link-vmlinux.sh] [cascardo: no IBT support] Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Greg Kroah-Hartman --- arch/x86/entry/entry_64.S | 3 +- arch/x86/entry/entry_64_compat.S | 6 +- arch/x86/include/asm/nospec-branch.h | 12 ++ arch/x86/include/asm/unwind_hints.h | 4 + arch/x86/kernel/head_64.S | 5 + arch/x86/xen/xen-asm.S | 10 +- include/linux/objtool.h | 3 + scripts/link-vmlinux.sh | 3 + tools/include/linux/objtool.h | 3 + tools/objtool/builtin-check.c | 3 +- tools/objtool/check.c | 172 +++++++++++++++++++++++- tools/objtool/include/objtool/builtin.h | 2 +- tools/objtool/include/objtool/check.h | 6 + 13 files changed, 217 insertions(+), 15 deletions(-) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index f9afa650955f1f..de3d28ced917b0 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -85,7 +85,7 @@ */ SYM_CODE_START(entry_SYSCALL_64) - UNWIND_HINT_EMPTY + UNWIND_HINT_ENTRY swapgs /* tss.sp2 is scratch space. */ @@ -1067,6 +1067,7 @@ SYM_CODE_START_LOCAL(error_entry) .Lerror_entry_done_lfence: FENCE_SWAPGS_KERNEL_ENTRY leaq 8(%rsp), %rax /* return pt_regs pointer */ + ANNOTATE_UNRET_END RET .Lbstep_iret: diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S index e2f25403ec3072..4d637a965efbe5 100644 --- a/arch/x86/entry/entry_64_compat.S +++ b/arch/x86/entry/entry_64_compat.S @@ -49,7 +49,7 @@ * 0(%ebp) arg6 */ SYM_CODE_START(entry_SYSENTER_compat) - UNWIND_HINT_EMPTY + UNWIND_HINT_ENTRY /* Interrupts are off on entry. */ SWAPGS @@ -202,7 +202,7 @@ SYM_CODE_END(entry_SYSENTER_compat) * 0(%esp) arg6 */ SYM_CODE_START(entry_SYSCALL_compat) - UNWIND_HINT_EMPTY + UNWIND_HINT_ENTRY /* Interrupts are off on entry. */ swapgs @@ -349,7 +349,7 @@ SYM_CODE_END(entry_SYSCALL_compat) * ebp arg6 */ SYM_CODE_START(entry_INT80_compat) - UNWIND_HINT_EMPTY + UNWIND_HINT_ENTRY /* * Interrupts are off on entry. */ diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index e4bb6dbc5ce85f..b3b26674f17da5 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -81,6 +81,17 @@ */ #define ANNOTATE_UNRET_SAFE ANNOTATE_RETPOLINE_SAFE +/* + * Abuse ANNOTATE_RETPOLINE_SAFE on a NOP to indicate UNRET_END, should + * eventually turn into it's own annotation. + */ +.macro ANNOTATE_UNRET_END +#ifdef CONFIG_DEBUG_ENTRY + ANNOTATE_RETPOLINE_SAFE + nop +#endif +.endm + /* * JMP_NOSPEC and CALL_NOSPEC macros can be used instead of a simple * indirect jmp/call which may be susceptible to the Spectre variant 2 @@ -131,6 +142,7 @@ */ .macro UNTRAIN_RET #ifdef CONFIG_RETPOLINE + ANNOTATE_UNRET_END ALTERNATIVE_2 "", \ "call zen_untrain_ret", X86_FEATURE_UNRET, \ "call entry_ibpb", X86_FEATURE_ENTRY_IBPB diff --git a/arch/x86/include/asm/unwind_hints.h b/arch/x86/include/asm/unwind_hints.h index 8e574c0afef80d..9cd8214175b524 100644 --- a/arch/x86/include/asm/unwind_hints.h +++ b/arch/x86/include/asm/unwind_hints.h @@ -11,6 +11,10 @@ UNWIND_HINT sp_reg=ORC_REG_UNDEFINED type=UNWIND_HINT_TYPE_CALL end=1 .endm +.macro UNWIND_HINT_ENTRY + UNWIND_HINT sp_reg=ORC_REG_UNDEFINED type=UNWIND_HINT_TYPE_ENTRY end=1 +.endm + .macro UNWIND_HINT_REGS base=%rsp offset=0 indirect=0 extra=1 partial=0 .if \base == %rsp .if \indirect diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index d8b3ebd2bb85fe..81f1ae278718ed 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -312,6 +312,8 @@ SYM_CODE_END(start_cpu0) SYM_CODE_START_NOALIGN(vc_boot_ghcb) UNWIND_HINT_IRET_REGS offset=8 + ANNOTATE_UNRET_END + /* Build pt_regs */ PUSH_AND_CLEAR_REGS @@ -369,6 +371,7 @@ SYM_CODE_START(early_idt_handler_array) SYM_CODE_END(early_idt_handler_array) SYM_CODE_START_LOCAL(early_idt_handler_common) + ANNOTATE_UNRET_END /* * The stack is the hardware frame, an error code or zero, and the * vector number. @@ -415,6 +418,8 @@ SYM_CODE_END(early_idt_handler_common) SYM_CODE_START_NOALIGN(vc_no_ghcb) UNWIND_HINT_IRET_REGS offset=8 + ANNOTATE_UNRET_END + /* Build pt_regs */ PUSH_AND_CLEAR_REGS diff --git a/arch/x86/xen/xen-asm.S b/arch/x86/xen/xen-asm.S index 2cf22624012cd5..1b757a1ee1bb62 100644 --- a/arch/x86/xen/xen-asm.S +++ b/arch/x86/xen/xen-asm.S @@ -120,7 +120,7 @@ SYM_FUNC_END(xen_read_cr2_direct); .macro xen_pv_trap name SYM_CODE_START(xen_\name) - UNWIND_HINT_EMPTY + UNWIND_HINT_ENTRY pop %rcx pop %r11 jmp \name @@ -228,7 +228,7 @@ SYM_CODE_END(xenpv_restore_regs_and_return_to_usermode) /* Normal 64-bit system call target */ SYM_CODE_START(xen_entry_SYSCALL_64) - UNWIND_HINT_EMPTY + UNWIND_HINT_ENTRY popq %rcx popq %r11 @@ -247,7 +247,7 @@ SYM_CODE_END(xen_entry_SYSCALL_64) /* 32-bit compat syscall target */ SYM_CODE_START(xen_entry_SYSCALL_compat) - UNWIND_HINT_EMPTY + UNWIND_HINT_ENTRY popq %rcx popq %r11 @@ -264,7 +264,7 @@ SYM_CODE_END(xen_entry_SYSCALL_compat) /* 32-bit compat sysenter target */ SYM_CODE_START(xen_entry_SYSENTER_compat) - UNWIND_HINT_EMPTY + UNWIND_HINT_ENTRY /* * NB: Xen is polite and clears TF from EFLAGS for us. This means * that we don't need to guard against single step exceptions here. @@ -287,7 +287,7 @@ SYM_CODE_END(xen_entry_SYSENTER_compat) SYM_CODE_START(xen_entry_SYSCALL_compat) SYM_CODE_START(xen_entry_SYSENTER_compat) - UNWIND_HINT_EMPTY + UNWIND_HINT_ENTRY lea 16(%rsp), %rsp /* strip %rcx, %r11 */ mov $-ENOSYS, %rax pushq $0 diff --git a/include/linux/objtool.h b/include/linux/objtool.h index 7e72d975cb7611..7d1c3a9767094a 100644 --- a/include/linux/objtool.h +++ b/include/linux/objtool.h @@ -32,11 +32,14 @@ struct unwind_hint { * * UNWIND_HINT_FUNC: Generate the unwind metadata of a callable function. * Useful for code which doesn't have an ELF function annotation. + * + * UNWIND_HINT_ENTRY: machine entry without stack, SYSCALL/SYSENTER etc. */ #define UNWIND_HINT_TYPE_CALL 0 #define UNWIND_HINT_TYPE_REGS 1 #define UNWIND_HINT_TYPE_REGS_PARTIAL 2 #define UNWIND_HINT_TYPE_FUNC 3 +#define UNWIND_HINT_TYPE_ENTRY 4 #ifdef CONFIG_STACK_VALIDATION diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh index 59a3df87907ef1..3c2adb7563f1b5 100755 --- a/scripts/link-vmlinux.sh +++ b/scripts/link-vmlinux.sh @@ -120,6 +120,9 @@ objtool_link() if [ -n "${CONFIG_VMLINUX_VALIDATION}" ]; then objtoolopt="${objtoolopt} --noinstr" + if is_enabled CONFIG_RETPOLINE; then + objtoolopt="${objtoolopt} --unret" + fi fi if [ -n "${objtoolopt}" ]; then diff --git a/tools/include/linux/objtool.h b/tools/include/linux/objtool.h index 7e72d975cb7611..7d1c3a9767094a 100644 --- a/tools/include/linux/objtool.h +++ b/tools/include/linux/objtool.h @@ -32,11 +32,14 @@ struct unwind_hint { * * UNWIND_HINT_FUNC: Generate the unwind metadata of a callable function. * Useful for code which doesn't have an ELF function annotation. + * + * UNWIND_HINT_ENTRY: machine entry without stack, SYSCALL/SYSENTER etc. */ #define UNWIND_HINT_TYPE_CALL 0 #define UNWIND_HINT_TYPE_REGS 1 #define UNWIND_HINT_TYPE_REGS_PARTIAL 2 #define UNWIND_HINT_TYPE_FUNC 3 +#define UNWIND_HINT_TYPE_ENTRY 4 #ifdef CONFIG_STACK_VALIDATION diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c index 38070f26105bd4..ea14dfcad60abd 100644 --- a/tools/objtool/builtin-check.c +++ b/tools/objtool/builtin-check.c @@ -20,7 +20,7 @@ #include bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats, - validate_dup, vmlinux, mcount, noinstr, backup, sls; + validate_dup, vmlinux, mcount, noinstr, backup, sls, unret; static const char * const check_usage[] = { "objtool check [] file.o", @@ -36,6 +36,7 @@ const struct option check_options[] = { OPT_BOOLEAN('f', "no-fp", &no_fp, "Skip frame pointer validation"), OPT_BOOLEAN('u', "no-unreachable", &no_unreachable, "Skip 'unreachable instruction' warnings"), OPT_BOOLEAN('r', "retpoline", &retpoline, "Validate retpoline assumptions"), + OPT_BOOLEAN(0, "unret", &unret, "validate entry unret placement"), OPT_BOOLEAN('m', "module", &module, "Indicates the object will be part of a kernel module"), OPT_BOOLEAN('b', "backtrace", &backtrace, "unwind on error"), OPT_BOOLEAN('a', "uaccess", &uaccess, "enable uaccess checking"), diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 6cc7e31a3e6a0a..c22115e7138b89 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -1847,6 +1847,19 @@ static int read_unwind_hints(struct objtool_file *file) insn->hint = true; + if (hint->type == UNWIND_HINT_TYPE_REGS_PARTIAL) { + struct symbol *sym = find_symbol_by_offset(insn->sec, insn->offset); + + if (sym && sym->bind == STB_GLOBAL) { + insn->entry = 1; + } + } + + if (hint->type == UNWIND_HINT_TYPE_ENTRY) { + hint->type = UNWIND_HINT_TYPE_CALL; + insn->entry = 1; + } + if (hint->type == UNWIND_HINT_TYPE_FUNC) { insn->cfi = &func_cfi; continue; @@ -1895,8 +1908,9 @@ static int read_retpoline_hints(struct objtool_file *file) if (insn->type != INSN_JUMP_DYNAMIC && insn->type != INSN_CALL_DYNAMIC && - insn->type != INSN_RETURN) { - WARN_FUNC("retpoline_safe hint not an indirect jump/call/ret", + insn->type != INSN_RETURN && + insn->type != INSN_NOP) { + WARN_FUNC("retpoline_safe hint not an indirect jump/call/ret/nop", insn->sec, insn->offset); return -1; } @@ -2996,8 +3010,8 @@ static int validate_branch(struct objtool_file *file, struct symbol *func, return 1; } - visited = 1 << state.uaccess; - if (insn->visited) { + visited = VISITED_BRANCH << state.uaccess; + if (insn->visited & VISITED_BRANCH_MASK) { if (!insn->hint && !insn_cfi_match(insn, &state.cfi)) return 1; @@ -3223,6 +3237,145 @@ static int validate_unwind_hints(struct objtool_file *file, struct section *sec) return warnings; } +/* + * Validate rethunk entry constraint: must untrain RET before the first RET. + * + * Follow every branch (intra-function) and ensure ANNOTATE_UNRET_END comes + * before an actual RET instruction. + */ +static int validate_entry(struct objtool_file *file, struct instruction *insn) +{ + struct instruction *next, *dest; + int ret, warnings = 0; + + for (;;) { + next = next_insn_to_validate(file, insn); + + if (insn->visited & VISITED_ENTRY) + return 0; + + insn->visited |= VISITED_ENTRY; + + if (!insn->ignore_alts && !list_empty(&insn->alts)) { + struct alternative *alt; + bool skip_orig = false; + + list_for_each_entry(alt, &insn->alts, list) { + if (alt->skip_orig) + skip_orig = true; + + ret = validate_entry(file, alt->insn); + if (ret) { + if (backtrace) + BT_FUNC("(alt)", insn); + return ret; + } + } + + if (skip_orig) + return 0; + } + + switch (insn->type) { + + case INSN_CALL_DYNAMIC: + case INSN_JUMP_DYNAMIC: + case INSN_JUMP_DYNAMIC_CONDITIONAL: + WARN_FUNC("early indirect call", insn->sec, insn->offset); + return 1; + + case INSN_JUMP_UNCONDITIONAL: + case INSN_JUMP_CONDITIONAL: + if (!is_sibling_call(insn)) { + if (!insn->jump_dest) { + WARN_FUNC("unresolved jump target after linking?!?", + insn->sec, insn->offset); + return -1; + } + ret = validate_entry(file, insn->jump_dest); + if (ret) { + if (backtrace) { + BT_FUNC("(branch%s)", insn, + insn->type == INSN_JUMP_CONDITIONAL ? "-cond" : ""); + } + return ret; + } + + if (insn->type == INSN_JUMP_UNCONDITIONAL) + return 0; + + break; + } + + /* fallthrough */ + case INSN_CALL: + dest = find_insn(file, insn->call_dest->sec, + insn->call_dest->offset); + if (!dest) { + WARN("Unresolved function after linking!?: %s", + insn->call_dest->name); + return -1; + } + + ret = validate_entry(file, dest); + if (ret) { + if (backtrace) + BT_FUNC("(call)", insn); + return ret; + } + /* + * If a call returns without error, it must have seen UNTRAIN_RET. + * Therefore any non-error return is a success. + */ + return 0; + + case INSN_RETURN: + WARN_FUNC("RET before UNTRAIN", insn->sec, insn->offset); + return 1; + + case INSN_NOP: + if (insn->retpoline_safe) + return 0; + break; + + default: + break; + } + + if (!next) { + WARN_FUNC("teh end!", insn->sec, insn->offset); + return -1; + } + insn = next; + } + + return warnings; +} + +/* + * Validate that all branches starting at 'insn->entry' encounter UNRET_END + * before RET. + */ +static int validate_unret(struct objtool_file *file) +{ + struct instruction *insn; + int ret, warnings = 0; + + for_each_insn(file, insn) { + if (!insn->entry) + continue; + + ret = validate_entry(file, insn); + if (ret < 0) { + WARN_FUNC("Failed UNRET validation", insn->sec, insn->offset); + return ret; + } + warnings += ret; + } + + return warnings; +} + static int validate_retpoline(struct objtool_file *file) { struct instruction *insn; @@ -3490,6 +3643,17 @@ int check(struct objtool_file *file) goto out; warnings += ret; + if (unret) { + /* + * Must be after validate_branch() and friends, it plays + * further games with insn->visited. + */ + ret = validate_unret(file); + if (ret < 0) + return ret; + warnings += ret; + } + if (!warnings) { ret = validate_reachable_instructions(file); if (ret < 0) diff --git a/tools/objtool/include/objtool/builtin.h b/tools/objtool/include/objtool/builtin.h index 89ba869ed08fe4..1758a75f565b51 100644 --- a/tools/objtool/include/objtool/builtin.h +++ b/tools/objtool/include/objtool/builtin.h @@ -9,7 +9,7 @@ extern const struct option check_options[]; extern bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats, - validate_dup, vmlinux, mcount, noinstr, backup, sls; + validate_dup, vmlinux, mcount, noinstr, backup, sls, unret; extern int cmd_parse_options(int argc, const char **argv, const char * const usage[]); diff --git a/tools/objtool/include/objtool/check.h b/tools/objtool/include/objtool/check.h index 6cfff078897f5c..0de25d13ca9283 100644 --- a/tools/objtool/include/objtool/check.h +++ b/tools/objtool/include/objtool/check.h @@ -48,6 +48,7 @@ struct instruction { bool dead_end, ignore, ignore_alts; bool hint; bool retpoline_safe; + bool entry; s8 instr; u8 visited; struct alt_group *alt_group; @@ -62,6 +63,11 @@ struct instruction { struct cfi_state *cfi; }; +#define VISITED_BRANCH 0x01 +#define VISITED_BRANCH_UACCESS 0x02 +#define VISITED_BRANCH_MASK 0x03 +#define VISITED_ENTRY 0x04 + static inline bool is_static_jump(struct instruction *insn) { return insn->type == INSN_JUMP_CONDITIONAL || From 75d4fc2e965f89f5844637ead257f03ec6f1d269 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 14 Jun 2022 23:16:04 +0200 Subject: [PATCH 0720/1056] x86/cpu/amd: Add Spectral Chicken commit d7caac991feeef1b871ee6988fd2c9725df09039 upstream. Zen2 uarchs have an undocumented, unnamed, MSR that contains a chicken bit for some speculation behaviour. It needs setting. Note: very belatedly AMD released naming; it's now officially called MSR_AMD64_DE_CFG2 and MSR_AMD64_DE_CFG2_SUPPRESS_NOBR_PRED_BIT but shall remain the SPECTRAL CHICKEN. Suggested-by: Andrew Cooper Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov Reviewed-by: Josh Poimboeuf Signed-off-by: Borislav Petkov Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/msr-index.h | 3 +++ arch/x86/kernel/cpu/amd.c | 23 ++++++++++++++++++++++- arch/x86/kernel/cpu/cpu.h | 2 ++ arch/x86/kernel/cpu/hygon.c | 6 ++++++ 4 files changed, 33 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 320826cbfb6976..aecc76b4dfbeb0 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -515,6 +515,9 @@ /* Fam 17h MSRs */ #define MSR_F17H_IRPERF 0xc00000e9 +#define MSR_ZEN2_SPECTRAL_CHICKEN 0xc00110e3 +#define MSR_ZEN2_SPECTRAL_CHICKEN_BIT BIT_ULL(1) + /* Fam 16h MSRs */ #define MSR_F16H_L2I_PERF_CTL 0xc0010230 #define MSR_F16H_L2I_PERF_CTR 0xc0010231 diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 4edb6f0f628c20..c3a23c398680c6 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -886,6 +886,26 @@ static void init_amd_bd(struct cpuinfo_x86 *c) clear_rdrand_cpuid_bit(c); } +void init_spectral_chicken(struct cpuinfo_x86 *c) +{ + u64 value; + + /* + * On Zen2 we offer this chicken (bit) on the altar of Speculation. + * + * This suppresses speculation from the middle of a basic block, i.e. it + * suppresses non-branch predictions. + * + * We use STIBP as a heuristic to filter out Zen2 from the rest of F17H + */ + if (!cpu_has(c, X86_FEATURE_HYPERVISOR) && cpu_has(c, X86_FEATURE_AMD_STIBP)) { + if (!rdmsrl_safe(MSR_ZEN2_SPECTRAL_CHICKEN, &value)) { + value |= MSR_ZEN2_SPECTRAL_CHICKEN_BIT; + wrmsrl_safe(MSR_ZEN2_SPECTRAL_CHICKEN, value); + } + } +} + static void init_amd_zn(struct cpuinfo_x86 *c) { set_cpu_cap(c, X86_FEATURE_ZEN); @@ -931,7 +951,8 @@ static void init_amd(struct cpuinfo_x86 *c) case 0x12: init_amd_ln(c); break; case 0x15: init_amd_bd(c); break; case 0x16: init_amd_jg(c); break; - case 0x17: fallthrough; + case 0x17: init_spectral_chicken(c); + fallthrough; case 0x19: init_amd_zn(c); break; } diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h index 2a8e584fc99138..7c9b5893c30aba 100644 --- a/arch/x86/kernel/cpu/cpu.h +++ b/arch/x86/kernel/cpu/cpu.h @@ -61,6 +61,8 @@ static inline void tsx_init(void) { } static inline void tsx_ap_init(void) { } #endif /* CONFIG_CPU_SUP_INTEL */ +extern void init_spectral_chicken(struct cpuinfo_x86 *c); + extern void get_cpu_cap(struct cpuinfo_x86 *c); extern void get_cpu_address_sizes(struct cpuinfo_x86 *c); extern void cpu_detect_cache_sizes(struct cpuinfo_x86 *c); diff --git a/arch/x86/kernel/cpu/hygon.c b/arch/x86/kernel/cpu/hygon.c index 3fcdda4c1e114f..21fd425088fe58 100644 --- a/arch/x86/kernel/cpu/hygon.c +++ b/arch/x86/kernel/cpu/hygon.c @@ -302,6 +302,12 @@ static void init_hygon(struct cpuinfo_x86 *c) /* get apicid instead of initial apic id from cpuid */ c->apicid = hard_smp_processor_id(); + /* + * XXX someone from Hygon needs to confirm this DTRT + * + init_spectral_chicken(c); + */ + set_cpu_cap(c, X86_FEATURE_ZEN); set_cpu_cap(c, X86_FEATURE_CPB); From d427c1f83e8cab913c52d047b891ee895f989e61 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Tue, 14 Jun 2022 23:16:05 +0200 Subject: [PATCH 0721/1056] x86/speculation: Fix RSB filling with CONFIG_RETPOLINE=n commit b2620facef4889fefcbf2e87284f34dcd4189bce upstream. If a kernel is built with CONFIG_RETPOLINE=n, but the user still wants to mitigate Spectre v2 using IBRS or eIBRS, the RSB filling will be silently disabled. There's nothing retpoline-specific about RSB buffer filling. Remove the CONFIG_RETPOLINE guards around it. Signed-off-by: Josh Poimboeuf Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Greg Kroah-Hartman --- arch/x86/entry/entry_32.S | 2 -- arch/x86/entry/entry_64.S | 2 -- arch/x86/include/asm/nospec-branch.h | 2 -- 3 files changed, 6 deletions(-) diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index 00413e37feee15..5bd3baf36d874b 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -701,7 +701,6 @@ SYM_CODE_START(__switch_to_asm) movl %ebx, PER_CPU_VAR(__stack_chk_guard) #endif -#ifdef CONFIG_RETPOLINE /* * When switching from a shallower to a deeper call stack * the RSB may either underflow or use entries populated @@ -710,7 +709,6 @@ SYM_CODE_START(__switch_to_asm) * speculative execution to prevent attack. */ FILL_RETURN_BUFFER %ebx, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW -#endif /* Restore flags or the incoming task to restore AC state. */ popfl diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index de3d28ced917b0..08b047b023ee47 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -248,7 +248,6 @@ SYM_FUNC_START(__switch_to_asm) movq %rbx, PER_CPU_VAR(fixed_percpu_data) + stack_canary_offset #endif -#ifdef CONFIG_RETPOLINE /* * When switching from a shallower to a deeper call stack * the RSB may either underflow or use entries populated @@ -257,7 +256,6 @@ SYM_FUNC_START(__switch_to_asm) * speculative execution to prevent attack. */ FILL_RETURN_BUFFER %r12, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW -#endif /* restore callee-saved registers */ popq %r15 diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index b3b26674f17da5..be520f18f0c56e 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -122,11 +122,9 @@ * monstrosity above, manually. */ .macro FILL_RETURN_BUFFER reg:req nr:req ftr:req -#ifdef CONFIG_RETPOLINE ALTERNATIVE "jmp .Lskip_rsb_\@", "", \ftr __FILL_RETURN_BUFFER(\reg,\nr,%_ASM_SP) .Lskip_rsb_\@: -#endif .endm /* From e898e28762f830592d723ddb34db8a86735a2e70 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Tue, 14 Jun 2022 23:16:06 +0200 Subject: [PATCH 0722/1056] x86/speculation: Fix firmware entry SPEC_CTRL handling commit e6aa13622ea8283cc699cac5d018cc40a2ba2010 upstream. The firmware entry code may accidentally clear STIBP or SSBD. Fix that. Signed-off-by: Josh Poimboeuf Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/nospec-branch.h | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index be520f18f0c56e..1b2ce1efe97121 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -286,18 +286,16 @@ extern u64 spec_ctrl_current(void); */ #define firmware_restrict_branch_speculation_start() \ do { \ - u64 val = x86_spec_ctrl_base | SPEC_CTRL_IBRS; \ - \ preempt_disable(); \ - alternative_msr_write(MSR_IA32_SPEC_CTRL, val, \ + alternative_msr_write(MSR_IA32_SPEC_CTRL, \ + spec_ctrl_current() | SPEC_CTRL_IBRS, \ X86_FEATURE_USE_IBRS_FW); \ } while (0) #define firmware_restrict_branch_speculation_end() \ do { \ - u64 val = x86_spec_ctrl_base; \ - \ - alternative_msr_write(MSR_IA32_SPEC_CTRL, val, \ + alternative_msr_write(MSR_IA32_SPEC_CTRL, \ + spec_ctrl_current(), \ X86_FEATURE_USE_IBRS_FW); \ preempt_enable(); \ } while (0) From 2069bd0e6f5b0c4ded790ff34b8ee5df909b51e0 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Tue, 14 Jun 2022 23:16:07 +0200 Subject: [PATCH 0723/1056] x86/speculation: Fix SPEC_CTRL write on SMT state change commit 56aa4d221f1ee2c3a49b45b800778ec6e0ab73c5 upstream. If the SMT state changes, SSBD might get accidentally disabled. Fix that. Signed-off-by: Josh Poimboeuf Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 8401fe92e2d20a..e4d615978f02e1 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -1451,7 +1451,8 @@ static void __init spectre_v2_select_mitigation(void) static void update_stibp_msr(void * __unused) { - write_spec_ctrl_current(x86_spec_ctrl_base, true); + u64 val = spec_ctrl_current() | (x86_spec_ctrl_base & SPEC_CTRL_STIBP); + write_spec_ctrl_current(val, true); } /* Update x86_spec_ctrl_base in case SMT state changed. */ From b55663e7521fe8257ad722206e5296f19b7ff0d7 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Tue, 14 Jun 2022 23:16:08 +0200 Subject: [PATCH 0724/1056] x86/speculation: Use cached host SPEC_CTRL value for guest entry/exit commit bbb69e8bee1bd882784947095ffb2bfe0f7c9470 upstream. There's no need to recalculate the host value for every entry/exit. Just use the cached value in spec_ctrl_current(). Signed-off-by: Josh Poimboeuf Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index e4d615978f02e1..89d6b5e615a067 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -208,7 +208,7 @@ void __init check_bugs(void) void x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest) { - u64 msrval, guestval, hostval = x86_spec_ctrl_base; + u64 msrval, guestval, hostval = spec_ctrl_current(); struct thread_info *ti = current_thread_info(); /* Is MSR_SPEC_CTRL implemented ? */ @@ -221,15 +221,6 @@ x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest) guestval = hostval & ~x86_spec_ctrl_mask; guestval |= guest_spec_ctrl & x86_spec_ctrl_mask; - /* SSBD controlled in MSR_SPEC_CTRL */ - if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) || - static_cpu_has(X86_FEATURE_AMD_SSBD)) - hostval |= ssbd_tif_to_spec_ctrl(ti->flags); - - /* Conditional STIBP enabled? */ - if (static_branch_unlikely(&switch_to_cond_stibp)) - hostval |= stibp_tif_to_spec_ctrl(ti->flags); - if (hostval != guestval) { msrval = setguest ? guestval : hostval; wrmsrl(MSR_IA32_SPEC_CTRL, msrval); @@ -1390,7 +1381,6 @@ static void __init spectre_v2_select_mitigation(void) pr_err(SPECTRE_V2_EIBRS_EBPF_MSG); if (spectre_v2_in_ibrs_mode(mode)) { - /* Force it so VMEXIT will restore correctly */ x86_spec_ctrl_base |= SPEC_CTRL_IBRS; write_spec_ctrl_current(x86_spec_ctrl_base, true); } From 66b37dccbc2524b675bfb2b3f4281a943cc36169 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Fri, 17 Jun 2022 12:12:48 -0700 Subject: [PATCH 0725/1056] x86/speculation: Remove x86_spec_ctrl_mask commit acac5e98ef8d638a411cfa2ee676c87e1973f126 upstream. This mask has been made redundant by kvm_spec_ctrl_test_value(). And it doesn't even work when MSR interception is disabled, as the guest can just write to SPEC_CTRL directly. Signed-off-by: Josh Poimboeuf Signed-off-by: Borislav Petkov Reviewed-by: Paolo Bonzini Signed-off-by: Borislav Petkov Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 31 +------------------------------ 1 file changed, 1 insertion(+), 30 deletions(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 89d6b5e615a067..2d89f8eab6c691 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -85,12 +85,6 @@ u64 spec_ctrl_current(void) } EXPORT_SYMBOL_GPL(spec_ctrl_current); -/* - * The vendor and possibly platform specific bits which can be modified in - * x86_spec_ctrl_base. - */ -static u64 __ro_after_init x86_spec_ctrl_mask = SPEC_CTRL_IBRS; - /* * AMD specific MSR info for Speculative Store Bypass control. * x86_amd_ls_cfg_ssbd_mask is initialized in identify_boot_cpu(). @@ -146,10 +140,6 @@ void __init check_bugs(void) if (boot_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) rdmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); - /* Allow STIBP in MSR_SPEC_CTRL if supported */ - if (boot_cpu_has(X86_FEATURE_STIBP)) - x86_spec_ctrl_mask |= SPEC_CTRL_STIBP; - /* Select the proper CPU mitigations before patching alternatives: */ spectre_v1_select_mitigation(); spectre_v2_select_mitigation(); @@ -208,19 +198,10 @@ void __init check_bugs(void) void x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest) { - u64 msrval, guestval, hostval = spec_ctrl_current(); + u64 msrval, guestval = guest_spec_ctrl, hostval = spec_ctrl_current(); struct thread_info *ti = current_thread_info(); - /* Is MSR_SPEC_CTRL implemented ? */ if (static_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) { - /* - * Restrict guest_spec_ctrl to supported values. Clear the - * modifiable bits in the host base value and or the - * modifiable bits from the guest value. - */ - guestval = hostval & ~x86_spec_ctrl_mask; - guestval |= guest_spec_ctrl & x86_spec_ctrl_mask; - if (hostval != guestval) { msrval = setguest ? guestval : hostval; wrmsrl(MSR_IA32_SPEC_CTRL, msrval); @@ -1658,16 +1639,6 @@ static enum ssb_mitigation __init __ssb_select_mitigation(void) break; } - /* - * If SSBD is controlled by the SPEC_CTRL MSR, then set the proper - * bit in the mask to allow guests to use the mitigation even in the - * case where the host does not enable it. - */ - if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) || - static_cpu_has(X86_FEATURE_AMD_SSBD)) { - x86_spec_ctrl_mask |= SPEC_CTRL_SSBD; - } - /* * We have three CPU feature flags that are in play here: * - X86_BUG_SPEC_STORE_BYPASS - CPU is susceptible. From c590fa2d2dac50e9e1ea6946506636617b2f8694 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Fri, 24 Jun 2022 12:52:40 +0200 Subject: [PATCH 0726/1056] objtool: Re-add UNWIND_HINT_{SAVE_RESTORE} commit 8faea26e611189e933ea2281975ff4dc7c1106b6 upstream. Commit c536ed2fffd5 ("objtool: Remove SAVE/RESTORE hints") removed the save/restore unwind hints because they were no longer needed. Now they're going to be needed again so re-add them. Signed-off-by: Josh Poimboeuf Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/unwind_hints.h | 12 ++++++-- include/linux/objtool.h | 6 ++-- tools/include/linux/objtool.h | 6 ++-- tools/objtool/check.c | 40 +++++++++++++++++++++++++++ tools/objtool/include/objtool/check.h | 1 + 5 files changed, 59 insertions(+), 6 deletions(-) diff --git a/arch/x86/include/asm/unwind_hints.h b/arch/x86/include/asm/unwind_hints.h index 9cd8214175b524..56664b31b6dad9 100644 --- a/arch/x86/include/asm/unwind_hints.h +++ b/arch/x86/include/asm/unwind_hints.h @@ -8,11 +8,11 @@ #ifdef __ASSEMBLY__ .macro UNWIND_HINT_EMPTY - UNWIND_HINT sp_reg=ORC_REG_UNDEFINED type=UNWIND_HINT_TYPE_CALL end=1 + UNWIND_HINT type=UNWIND_HINT_TYPE_CALL end=1 .endm .macro UNWIND_HINT_ENTRY - UNWIND_HINT sp_reg=ORC_REG_UNDEFINED type=UNWIND_HINT_TYPE_ENTRY end=1 + UNWIND_HINT type=UNWIND_HINT_TYPE_ENTRY end=1 .endm .macro UNWIND_HINT_REGS base=%rsp offset=0 indirect=0 extra=1 partial=0 @@ -56,6 +56,14 @@ UNWIND_HINT sp_reg=ORC_REG_SP sp_offset=8 type=UNWIND_HINT_TYPE_FUNC .endm +.macro UNWIND_HINT_SAVE + UNWIND_HINT type=UNWIND_HINT_TYPE_SAVE +.endm + +.macro UNWIND_HINT_RESTORE + UNWIND_HINT type=UNWIND_HINT_TYPE_RESTORE +.endm + #endif /* __ASSEMBLY__ */ #endif /* _ASM_X86_UNWIND_HINTS_H */ diff --git a/include/linux/objtool.h b/include/linux/objtool.h index 7d1c3a9767094a..a2042c41868648 100644 --- a/include/linux/objtool.h +++ b/include/linux/objtool.h @@ -40,6 +40,8 @@ struct unwind_hint { #define UNWIND_HINT_TYPE_REGS_PARTIAL 2 #define UNWIND_HINT_TYPE_FUNC 3 #define UNWIND_HINT_TYPE_ENTRY 4 +#define UNWIND_HINT_TYPE_SAVE 5 +#define UNWIND_HINT_TYPE_RESTORE 6 #ifdef CONFIG_STACK_VALIDATION @@ -102,7 +104,7 @@ struct unwind_hint { * the debuginfo as necessary. It will also warn if it sees any * inconsistencies. */ -.macro UNWIND_HINT sp_reg:req sp_offset=0 type:req end=0 +.macro UNWIND_HINT type:req sp_reg=0 sp_offset=0 end=0 .Lunwind_hint_ip_\@: .pushsection .discard.unwind_hints /* struct unwind_hint */ @@ -132,7 +134,7 @@ struct unwind_hint { #define STACK_FRAME_NON_STANDARD(func) #else #define ANNOTATE_INTRA_FUNCTION_CALL -.macro UNWIND_HINT sp_reg:req sp_offset=0 type:req end=0 +.macro UNWIND_HINT type:req sp_reg=0 sp_offset=0 end=0 .endm .macro STACK_FRAME_NON_STANDARD func:req .endm diff --git a/tools/include/linux/objtool.h b/tools/include/linux/objtool.h index 7d1c3a9767094a..a2042c41868648 100644 --- a/tools/include/linux/objtool.h +++ b/tools/include/linux/objtool.h @@ -40,6 +40,8 @@ struct unwind_hint { #define UNWIND_HINT_TYPE_REGS_PARTIAL 2 #define UNWIND_HINT_TYPE_FUNC 3 #define UNWIND_HINT_TYPE_ENTRY 4 +#define UNWIND_HINT_TYPE_SAVE 5 +#define UNWIND_HINT_TYPE_RESTORE 6 #ifdef CONFIG_STACK_VALIDATION @@ -102,7 +104,7 @@ struct unwind_hint { * the debuginfo as necessary. It will also warn if it sees any * inconsistencies. */ -.macro UNWIND_HINT sp_reg:req sp_offset=0 type:req end=0 +.macro UNWIND_HINT type:req sp_reg=0 sp_offset=0 end=0 .Lunwind_hint_ip_\@: .pushsection .discard.unwind_hints /* struct unwind_hint */ @@ -132,7 +134,7 @@ struct unwind_hint { #define STACK_FRAME_NON_STANDARD(func) #else #define ANNOTATE_INTRA_FUNCTION_CALL -.macro UNWIND_HINT sp_reg:req sp_offset=0 type:req end=0 +.macro UNWIND_HINT type:req sp_reg=0 sp_offset=0 end=0 .endm .macro STACK_FRAME_NON_STANDARD func:req .endm diff --git a/tools/objtool/check.c b/tools/objtool/check.c index c22115e7138b89..4c18a7e283445d 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -1847,6 +1847,17 @@ static int read_unwind_hints(struct objtool_file *file) insn->hint = true; + if (hint->type == UNWIND_HINT_TYPE_SAVE) { + insn->hint = false; + insn->save = true; + continue; + } + + if (hint->type == UNWIND_HINT_TYPE_RESTORE) { + insn->restore = true; + continue; + } + if (hint->type == UNWIND_HINT_TYPE_REGS_PARTIAL) { struct symbol *sym = find_symbol_by_offset(insn->sec, insn->offset); @@ -3025,6 +3036,35 @@ static int validate_branch(struct objtool_file *file, struct symbol *func, state.instr += insn->instr; if (insn->hint) { + if (insn->restore) { + struct instruction *save_insn, *i; + + i = insn; + save_insn = NULL; + + sym_for_each_insn_continue_reverse(file, func, i) { + if (i->save) { + save_insn = i; + break; + } + } + + if (!save_insn) { + WARN_FUNC("no corresponding CFI save for CFI restore", + sec, insn->offset); + return 1; + } + + if (!save_insn->visited) { + WARN_FUNC("objtool isn't smart enough to handle this CFI save/restore combo", + sec, insn->offset); + return 1; + } + + insn->cfi = save_insn->cfi; + nr_cfi_reused++; + } + state.cfi = *insn->cfi; } else { /* XXX track if we actually changed state.cfi */ diff --git a/tools/objtool/include/objtool/check.h b/tools/objtool/include/objtool/check.h index 0de25d13ca9283..4ba041db304f92 100644 --- a/tools/objtool/include/objtool/check.h +++ b/tools/objtool/include/objtool/check.h @@ -47,6 +47,7 @@ struct instruction { unsigned long immediate; bool dead_end, ignore, ignore_alts; bool hint; + bool save, restore; bool retpoline_safe; bool entry; s8 instr; From 852fe53040b973db272270a4a1ee1be0b51e112c Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Tue, 14 Jun 2022 23:16:11 +0200 Subject: [PATCH 0727/1056] KVM: VMX: Flatten __vmx_vcpu_run() commit 8bd200d23ec42d66ccd517a72dd0b9cc6132d2fd upstream. Move the vmx_vm{enter,exit}() functionality into __vmx_vcpu_run(). This will make it easier to do the spec_ctrl handling before the first RET. Signed-off-by: Josh Poimboeuf Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov [cascardo: remove ENDBR] Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/vmx/vmenter.S | 118 ++++++++++++++----------------------- 1 file changed, 45 insertions(+), 73 deletions(-) diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S index 435c187927c485..cd1cd516ec3d14 100644 --- a/arch/x86/kvm/vmx/vmenter.S +++ b/arch/x86/kvm/vmx/vmenter.S @@ -30,68 +30,6 @@ .section .noinstr.text, "ax" -/** - * vmx_vmenter - VM-Enter the current loaded VMCS - * - * %RFLAGS.ZF: !VMCS.LAUNCHED, i.e. controls VMLAUNCH vs. VMRESUME - * - * Returns: - * %RFLAGS.CF is set on VM-Fail Invalid - * %RFLAGS.ZF is set on VM-Fail Valid - * %RFLAGS.{CF,ZF} are cleared on VM-Success, i.e. VM-Exit - * - * Note that VMRESUME/VMLAUNCH fall-through and return directly if - * they VM-Fail, whereas a successful VM-Enter + VM-Exit will jump - * to vmx_vmexit. - */ -SYM_FUNC_START_LOCAL(vmx_vmenter) - /* EFLAGS.ZF is set if VMCS.LAUNCHED == 0 */ - je 2f - -1: vmresume - RET - -2: vmlaunch - RET - -3: cmpb $0, kvm_rebooting - je 4f - RET -4: ud2 - - _ASM_EXTABLE(1b, 3b) - _ASM_EXTABLE(2b, 3b) - -SYM_FUNC_END(vmx_vmenter) - -/** - * vmx_vmexit - Handle a VMX VM-Exit - * - * Returns: - * %RFLAGS.{CF,ZF} are cleared on VM-Success, i.e. VM-Exit - * - * This is vmx_vmenter's partner in crime. On a VM-Exit, control will jump - * here after hardware loads the host's state, i.e. this is the destination - * referred to by VMCS.HOST_RIP. - */ -SYM_FUNC_START(vmx_vmexit) -#ifdef CONFIG_RETPOLINE - ALTERNATIVE "jmp .Lvmexit_skip_rsb", "", X86_FEATURE_RETPOLINE - /* Preserve guest's RAX, it's used to stuff the RSB. */ - push %_ASM_AX - - /* IMPORTANT: Stuff the RSB immediately after VM-Exit, before RET! */ - FILL_RETURN_BUFFER %_ASM_AX, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE - - /* Clear RFLAGS.CF and RFLAGS.ZF to preserve VM-Exit, i.e. !VM-Fail. */ - or $1, %_ASM_AX - - pop %_ASM_AX -.Lvmexit_skip_rsb: -#endif - RET -SYM_FUNC_END(vmx_vmexit) - /** * __vmx_vcpu_run - Run a vCPU via a transition to VMX guest mode * @vmx: struct vcpu_vmx * (forwarded to vmx_update_host_rsp) @@ -124,8 +62,7 @@ SYM_FUNC_START(__vmx_vcpu_run) /* Copy @launched to BL, _ASM_ARG3 is volatile. */ mov %_ASM_ARG3B, %bl - /* Adjust RSP to account for the CALL to vmx_vmenter(). */ - lea -WORD_SIZE(%_ASM_SP), %_ASM_ARG2 + lea (%_ASM_SP), %_ASM_ARG2 call vmx_update_host_rsp /* Load @regs to RAX. */ @@ -154,11 +91,36 @@ SYM_FUNC_START(__vmx_vcpu_run) /* Load guest RAX. This kills the @regs pointer! */ mov VCPU_RAX(%_ASM_AX), %_ASM_AX - /* Enter guest mode */ - call vmx_vmenter + /* Check EFLAGS.ZF from 'testb' above */ + je .Lvmlaunch + + /* + * After a successful VMRESUME/VMLAUNCH, control flow "magically" + * resumes below at 'vmx_vmexit' due to the VMCS HOST_RIP setting. + * So this isn't a typical function and objtool needs to be told to + * save the unwind state here and restore it below. + */ + UNWIND_HINT_SAVE + +/* + * If VMRESUME/VMLAUNCH and corresponding vmexit succeed, execution resumes at + * the 'vmx_vmexit' label below. + */ +.Lvmresume: + vmresume + jmp .Lvmfail + +.Lvmlaunch: + vmlaunch + jmp .Lvmfail + + _ASM_EXTABLE(.Lvmresume, .Lfixup) + _ASM_EXTABLE(.Lvmlaunch, .Lfixup) - /* Jump on VM-Fail. */ - jbe 2f +SYM_INNER_LABEL(vmx_vmexit, SYM_L_GLOBAL) + + /* Restore unwind state from before the VMRESUME/VMLAUNCH. */ + UNWIND_HINT_RESTORE /* Temporarily save guest's RAX. */ push %_ASM_AX @@ -185,9 +147,13 @@ SYM_FUNC_START(__vmx_vcpu_run) mov %r15, VCPU_R15(%_ASM_AX) #endif + /* IMPORTANT: RSB must be stuffed before the first return. */ + FILL_RETURN_BUFFER %_ASM_BX, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE + /* Clear RAX to indicate VM-Exit (as opposed to VM-Fail). */ xor %eax, %eax +.Lclear_regs: /* * Clear all general purpose registers except RSP and RAX to prevent * speculative use of the guest's values, even those that are reloaded @@ -197,7 +163,7 @@ SYM_FUNC_START(__vmx_vcpu_run) * free. RSP and RAX are exempt as RSP is restored by hardware during * VM-Exit and RAX is explicitly loaded with 0 or 1 to return VM-Fail. */ -1: xor %ecx, %ecx + xor %ecx, %ecx xor %edx, %edx xor %ebx, %ebx xor %ebp, %ebp @@ -216,8 +182,8 @@ SYM_FUNC_START(__vmx_vcpu_run) /* "POP" @regs. */ add $WORD_SIZE, %_ASM_SP - pop %_ASM_BX + pop %_ASM_BX #ifdef CONFIG_X86_64 pop %r12 pop %r13 @@ -230,9 +196,15 @@ SYM_FUNC_START(__vmx_vcpu_run) pop %_ASM_BP RET - /* VM-Fail. Out-of-line to avoid a taken Jcc after VM-Exit. */ -2: mov $1, %eax - jmp 1b +.Lfixup: + cmpb $0, kvm_rebooting + jne .Lvmfail + ud2 +.Lvmfail: + /* VM-Fail: set return value to 1 */ + mov $1, %eax + jmp .Lclear_regs + SYM_FUNC_END(__vmx_vcpu_run) From 5fde25284dfe9d3f12afdceec410cddb8d4b889a Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Tue, 14 Jun 2022 23:16:12 +0200 Subject: [PATCH 0728/1056] KVM: VMX: Convert launched argument to flags commit bb06650634d3552c0f8557e9d16aa1a408040e28 upstream. Convert __vmx_vcpu_run()'s 'launched' argument to 'flags', in preparation for doing SPEC_CTRL handling immediately after vmexit, which will need another flag. This is much easier than adding a fourth argument, because this code supports both 32-bit and 64-bit, and the fourth argument on 32-bit would have to be pushed on the stack. Note that __vmx_vcpu_run_flags() is called outside of the noinstr critical section because it will soon start calling potentially traceable functions. Signed-off-by: Josh Poimboeuf Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/vmx/nested.c | 2 +- arch/x86/kvm/vmx/run_flags.h | 7 +++++++ arch/x86/kvm/vmx/vmenter.S | 9 +++++---- arch/x86/kvm/vmx/vmx.c | 17 ++++++++++++++--- arch/x86/kvm/vmx/vmx.h | 5 ++++- 5 files changed, 31 insertions(+), 9 deletions(-) create mode 100644 arch/x86/kvm/vmx/run_flags.h diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index f7bdb62d6cece1..5f91aa62bdca0e 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -3077,7 +3077,7 @@ static int nested_vmx_check_vmentry_hw(struct kvm_vcpu *vcpu) } vm_fail = __vmx_vcpu_run(vmx, (unsigned long *)&vcpu->arch.regs, - vmx->loaded_vmcs->launched); + __vmx_vcpu_run_flags(vmx)); if (vmx->msr_autoload.host.nr) vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.host.nr); diff --git a/arch/x86/kvm/vmx/run_flags.h b/arch/x86/kvm/vmx/run_flags.h new file mode 100644 index 00000000000000..57f4c664ea9c13 --- /dev/null +++ b/arch/x86/kvm/vmx/run_flags.h @@ -0,0 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __KVM_X86_VMX_RUN_FLAGS_H +#define __KVM_X86_VMX_RUN_FLAGS_H + +#define VMX_RUN_VMRESUME (1 << 0) + +#endif /* __KVM_X86_VMX_RUN_FLAGS_H */ diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S index cd1cd516ec3d14..95d3a47b157d23 100644 --- a/arch/x86/kvm/vmx/vmenter.S +++ b/arch/x86/kvm/vmx/vmenter.S @@ -5,6 +5,7 @@ #include #include #include +#include "run_flags.h" #define WORD_SIZE (BITS_PER_LONG / 8) @@ -34,7 +35,7 @@ * __vmx_vcpu_run - Run a vCPU via a transition to VMX guest mode * @vmx: struct vcpu_vmx * (forwarded to vmx_update_host_rsp) * @regs: unsigned long * (to guest registers) - * @launched: %true if the VMCS has been launched + * @flags: VMX_RUN_VMRESUME: use VMRESUME instead of VMLAUNCH * * Returns: * 0 on VM-Exit, 1 on VM-Fail @@ -59,7 +60,7 @@ SYM_FUNC_START(__vmx_vcpu_run) */ push %_ASM_ARG2 - /* Copy @launched to BL, _ASM_ARG3 is volatile. */ + /* Copy @flags to BL, _ASM_ARG3 is volatile. */ mov %_ASM_ARG3B, %bl lea (%_ASM_SP), %_ASM_ARG2 @@ -69,7 +70,7 @@ SYM_FUNC_START(__vmx_vcpu_run) mov (%_ASM_SP), %_ASM_AX /* Check if vmlaunch or vmresume is needed */ - testb %bl, %bl + testb $VMX_RUN_VMRESUME, %bl /* Load guest registers. Don't clobber flags. */ mov VCPU_RCX(%_ASM_AX), %_ASM_CX @@ -92,7 +93,7 @@ SYM_FUNC_START(__vmx_vcpu_run) mov VCPU_RAX(%_ASM_AX), %_ASM_AX /* Check EFLAGS.ZF from 'testb' above */ - je .Lvmlaunch + jz .Lvmlaunch /* * After a successful VMRESUME/VMLAUNCH, control flow "magically" diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 54724f520556f5..bac08c265ddf63 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -835,6 +835,16 @@ static bool msr_write_intercepted(struct vcpu_vmx *vmx, u32 msr) MSR_IA32_SPEC_CTRL); } +unsigned int __vmx_vcpu_run_flags(struct vcpu_vmx *vmx) +{ + unsigned int flags = 0; + + if (vmx->loaded_vmcs->launched) + flags |= VMX_RUN_VMRESUME; + + return flags; +} + static void clear_atomic_switch_msr_special(struct vcpu_vmx *vmx, unsigned long entry, unsigned long exit) { @@ -6667,7 +6677,8 @@ static fastpath_t vmx_exit_handlers_fastpath(struct kvm_vcpu *vcpu) } static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu, - struct vcpu_vmx *vmx) + struct vcpu_vmx *vmx, + unsigned long flags) { kvm_guest_enter_irqoff(); @@ -6686,7 +6697,7 @@ static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu, native_write_cr2(vcpu->arch.cr2); vmx->fail = __vmx_vcpu_run(vmx, (unsigned long *)&vcpu->arch.regs, - vmx->loaded_vmcs->launched); + flags); vcpu->arch.cr2 = native_read_cr2(); @@ -6786,7 +6797,7 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu) x86_spec_ctrl_set_guest(vmx->spec_ctrl, 0); /* The actual VMENTER/EXIT is in the .noinstr.text section. */ - vmx_vcpu_enter_exit(vcpu, vmx); + vmx_vcpu_enter_exit(vcpu, vmx, __vmx_vcpu_run_flags(vmx)); /* * We do not use IBRS in the kernel. If this vCPU has used the diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index 12fe7b31cbf686..35476115cbd0b9 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h @@ -13,6 +13,7 @@ #include "vmcs.h" #include "vmx_ops.h" #include "cpuid.h" +#include "run_flags.h" #define MSR_TYPE_R 1 #define MSR_TYPE_W 2 @@ -382,7 +383,9 @@ void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu); struct vmx_uret_msr *vmx_find_uret_msr(struct vcpu_vmx *vmx, u32 msr); void pt_update_intercept_for_msr(struct kvm_vcpu *vcpu); void vmx_update_host_rsp(struct vcpu_vmx *vmx, unsigned long host_rsp); -bool __vmx_vcpu_run(struct vcpu_vmx *vmx, unsigned long *regs, bool launched); +unsigned int __vmx_vcpu_run_flags(struct vcpu_vmx *vmx); +bool __vmx_vcpu_run(struct vcpu_vmx *vmx, unsigned long *regs, + unsigned int flags); int vmx_find_loadstore_msr_slot(struct vmx_msrs *m, u32 msr); void vmx_ept_load_pdptrs(struct kvm_vcpu *vcpu); From 0cbd5905c8f3b5da498c21c4deee347622b3420b Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Tue, 14 Jun 2022 23:16:13 +0200 Subject: [PATCH 0729/1056] KVM: VMX: Prevent guest RSB poisoning attacks with eIBRS commit fc02735b14fff8c6678b521d324ade27b1a3d4cf upstream. On eIBRS systems, the returns in the vmexit return path from __vmx_vcpu_run() to vmx_vcpu_run() are exposed to RSB poisoning attacks. Fix that by moving the post-vmexit spec_ctrl handling to immediately after the vmexit. Signed-off-by: Josh Poimboeuf Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/nospec-branch.h | 1 + arch/x86/kernel/cpu/bugs.c | 4 +++ arch/x86/kvm/vmx/run_flags.h | 1 + arch/x86/kvm/vmx/vmenter.S | 49 +++++++++++++++++++++------- arch/x86/kvm/vmx/vmx.c | 48 +++++++++++++++------------ arch/x86/kvm/vmx/vmx.h | 1 + 6 files changed, 73 insertions(+), 31 deletions(-) diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 1b2ce1efe97121..a27d9880e4cd11 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -275,6 +275,7 @@ static inline void indirect_branch_prediction_barrier(void) /* The Intel SPEC CTRL MSR base value cache */ extern u64 x86_spec_ctrl_base; +extern u64 x86_spec_ctrl_current; extern void write_spec_ctrl_current(u64 val, bool force); extern u64 spec_ctrl_current(void); diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 2d89f8eab6c691..cef4f1fa7b7f91 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -195,6 +195,10 @@ void __init check_bugs(void) #endif } +/* + * NOTE: For VMX, this function is not called in the vmexit path. + * It uses vmx_spec_ctrl_restore_host() instead. + */ void x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest) { diff --git a/arch/x86/kvm/vmx/run_flags.h b/arch/x86/kvm/vmx/run_flags.h index 57f4c664ea9c13..edc3f16cc1896f 100644 --- a/arch/x86/kvm/vmx/run_flags.h +++ b/arch/x86/kvm/vmx/run_flags.h @@ -3,5 +3,6 @@ #define __KVM_X86_VMX_RUN_FLAGS_H #define VMX_RUN_VMRESUME (1 << 0) +#define VMX_RUN_SAVE_SPEC_CTRL (1 << 1) #endif /* __KVM_X86_VMX_RUN_FLAGS_H */ diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S index 95d3a47b157d23..1c1a3d6c2af251 100644 --- a/arch/x86/kvm/vmx/vmenter.S +++ b/arch/x86/kvm/vmx/vmenter.S @@ -33,9 +33,10 @@ /** * __vmx_vcpu_run - Run a vCPU via a transition to VMX guest mode - * @vmx: struct vcpu_vmx * (forwarded to vmx_update_host_rsp) + * @vmx: struct vcpu_vmx * * @regs: unsigned long * (to guest registers) - * @flags: VMX_RUN_VMRESUME: use VMRESUME instead of VMLAUNCH + * @flags: VMX_RUN_VMRESUME: use VMRESUME instead of VMLAUNCH + * VMX_RUN_SAVE_SPEC_CTRL: save guest SPEC_CTRL into vmx->spec_ctrl * * Returns: * 0 on VM-Exit, 1 on VM-Fail @@ -54,6 +55,12 @@ SYM_FUNC_START(__vmx_vcpu_run) #endif push %_ASM_BX + /* Save @vmx for SPEC_CTRL handling */ + push %_ASM_ARG1 + + /* Save @flags for SPEC_CTRL handling */ + push %_ASM_ARG3 + /* * Save @regs, _ASM_ARG2 may be modified by vmx_update_host_rsp() and * @regs is needed after VM-Exit to save the guest's register values. @@ -148,25 +155,23 @@ SYM_INNER_LABEL(vmx_vmexit, SYM_L_GLOBAL) mov %r15, VCPU_R15(%_ASM_AX) #endif - /* IMPORTANT: RSB must be stuffed before the first return. */ - FILL_RETURN_BUFFER %_ASM_BX, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE - - /* Clear RAX to indicate VM-Exit (as opposed to VM-Fail). */ - xor %eax, %eax + /* Clear return value to indicate VM-Exit (as opposed to VM-Fail). */ + xor %ebx, %ebx .Lclear_regs: /* - * Clear all general purpose registers except RSP and RAX to prevent + * Clear all general purpose registers except RSP and RBX to prevent * speculative use of the guest's values, even those that are reloaded * via the stack. In theory, an L1 cache miss when restoring registers * could lead to speculative execution with the guest's values. * Zeroing XORs are dirt cheap, i.e. the extra paranoia is essentially * free. RSP and RAX are exempt as RSP is restored by hardware during - * VM-Exit and RAX is explicitly loaded with 0 or 1 to return VM-Fail. + * VM-Exit and RBX is explicitly loaded with 0 or 1 to hold the return + * value. */ + xor %eax, %eax xor %ecx, %ecx xor %edx, %edx - xor %ebx, %ebx xor %ebp, %ebp xor %esi, %esi xor %edi, %edi @@ -184,6 +189,28 @@ SYM_INNER_LABEL(vmx_vmexit, SYM_L_GLOBAL) /* "POP" @regs. */ add $WORD_SIZE, %_ASM_SP + /* + * IMPORTANT: RSB filling and SPEC_CTRL handling must be done before + * the first unbalanced RET after vmexit! + * + * For retpoline, RSB filling is needed to prevent poisoned RSB entries + * and (in some cases) RSB underflow. + * + * eIBRS has its own protection against poisoned RSB, so it doesn't + * need the RSB filling sequence. But it does need to be enabled + * before the first unbalanced RET. + */ + + FILL_RETURN_BUFFER %_ASM_CX, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE + + pop %_ASM_ARG2 /* @flags */ + pop %_ASM_ARG1 /* @vmx */ + + call vmx_spec_ctrl_restore_host + + /* Put return value in AX */ + mov %_ASM_BX, %_ASM_AX + pop %_ASM_BX #ifdef CONFIG_X86_64 pop %r12 @@ -203,7 +230,7 @@ SYM_INNER_LABEL(vmx_vmexit, SYM_L_GLOBAL) ud2 .Lvmfail: /* VM-Fail: set return value to 1 */ - mov $1, %eax + mov $1, %_ASM_BX jmp .Lclear_regs SYM_FUNC_END(__vmx_vcpu_run) diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index bac08c265ddf63..f668df99666883 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -842,6 +842,14 @@ unsigned int __vmx_vcpu_run_flags(struct vcpu_vmx *vmx) if (vmx->loaded_vmcs->launched) flags |= VMX_RUN_VMRESUME; + /* + * If writes to the SPEC_CTRL MSR aren't intercepted, the guest is free + * to change it directly without causing a vmexit. In that case read + * it after vmexit and store it in vmx->spec_ctrl. + */ + if (unlikely(!msr_write_intercepted(vmx, MSR_IA32_SPEC_CTRL))) + flags |= VMX_RUN_SAVE_SPEC_CTRL; + return flags; } @@ -6664,6 +6672,26 @@ void noinstr vmx_update_host_rsp(struct vcpu_vmx *vmx, unsigned long host_rsp) } } +void noinstr vmx_spec_ctrl_restore_host(struct vcpu_vmx *vmx, + unsigned int flags) +{ + u64 hostval = this_cpu_read(x86_spec_ctrl_current); + + if (!cpu_feature_enabled(X86_FEATURE_MSR_SPEC_CTRL)) + return; + + if (flags & VMX_RUN_SAVE_SPEC_CTRL) + vmx->spec_ctrl = __rdmsr(MSR_IA32_SPEC_CTRL); + + /* + * If the guest/host SPEC_CTRL values differ, restore the host value. + */ + if (vmx->spec_ctrl != hostval) + native_wrmsrl(MSR_IA32_SPEC_CTRL, hostval); + + barrier_nospec(); +} + static fastpath_t vmx_exit_handlers_fastpath(struct kvm_vcpu *vcpu) { switch (to_vmx(vcpu)->exit_reason.basic) { @@ -6799,26 +6827,6 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu) /* The actual VMENTER/EXIT is in the .noinstr.text section. */ vmx_vcpu_enter_exit(vcpu, vmx, __vmx_vcpu_run_flags(vmx)); - /* - * We do not use IBRS in the kernel. If this vCPU has used the - * SPEC_CTRL MSR it may have left it on; save the value and - * turn it off. This is much more efficient than blindly adding - * it to the atomic save/restore list. Especially as the former - * (Saving guest MSRs on vmexit) doesn't even exist in KVM. - * - * For non-nested case: - * If the L01 MSR bitmap does not intercept the MSR, then we need to - * save it. - * - * For nested case: - * If the L02 MSR bitmap does not intercept the MSR, then we need to - * save it. - */ - if (unlikely(!msr_write_intercepted(vmx, MSR_IA32_SPEC_CTRL))) - vmx->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL); - - x86_spec_ctrl_restore_host(vmx->spec_ctrl, 0); - /* All fields are clean at this point */ if (static_branch_unlikely(&enable_evmcs)) { current_evmcs->hv_clean_fields |= diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index 35476115cbd0b9..a8b8150252bb76 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h @@ -383,6 +383,7 @@ void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu); struct vmx_uret_msr *vmx_find_uret_msr(struct vcpu_vmx *vmx, u32 msr); void pt_update_intercept_for_msr(struct kvm_vcpu *vcpu); void vmx_update_host_rsp(struct vcpu_vmx *vmx, unsigned long host_rsp); +void vmx_spec_ctrl_restore_host(struct vcpu_vmx *vmx, unsigned int flags); unsigned int __vmx_vcpu_run_flags(struct vcpu_vmx *vmx); bool __vmx_vcpu_run(struct vcpu_vmx *vmx, unsigned long *regs, unsigned int flags); From 4207d7b645b82b9a2c0faffeb9f36999ac040a2a Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Tue, 14 Jun 2022 23:16:14 +0200 Subject: [PATCH 0730/1056] KVM: VMX: Fix IBRS handling after vmexit commit bea7e31a5caccb6fe8ed989c065072354f0ecb52 upstream. For legacy IBRS to work, the IBRS bit needs to be always re-written after vmexit, even if it's already on. Signed-off-by: Josh Poimboeuf Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/vmx/vmx.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index f668df99666883..a236104fc7439d 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -6685,8 +6685,13 @@ void noinstr vmx_spec_ctrl_restore_host(struct vcpu_vmx *vmx, /* * If the guest/host SPEC_CTRL values differ, restore the host value. + * + * For legacy IBRS, the IBRS bit always needs to be written after + * transitioning from a less privileged predictor mode, regardless of + * whether the guest/host values differ. */ - if (vmx->spec_ctrl != hostval) + if (cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS) || + vmx->spec_ctrl != hostval) native_wrmsrl(MSR_IA32_SPEC_CTRL, hostval); barrier_nospec(); From 8d5cff499a6d740c91ff37963907e0e983c37f0f Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Tue, 14 Jun 2022 23:16:15 +0200 Subject: [PATCH 0731/1056] x86/speculation: Fill RSB on vmexit for IBRS commit 9756bba28470722dacb79ffce554336dd1f6a6cd upstream. Prevent RSB underflow/poisoning attacks with RSB. While at it, add a bunch of comments to attempt to document the current state of tribal knowledge about RSB attacks and what exactly is being mitigated. Signed-off-by: Josh Poimboeuf Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/cpufeatures.h | 2 +- arch/x86/kernel/cpu/bugs.c | 63 +++++++++++++++++++++++++++--- arch/x86/kvm/vmx/vmenter.S | 6 +-- 3 files changed, 62 insertions(+), 9 deletions(-) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index c61654bc13aa4c..47fc8ff98d2b4f 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -204,7 +204,7 @@ /* FREE! ( 7*32+10) */ #define X86_FEATURE_PTI ( 7*32+11) /* Kernel Page Table Isolation enabled */ #define X86_FEATURE_KERNEL_IBRS ( 7*32+12) /* "" Set/clear IBRS on kernel entry/exit */ -/* FREE! ( 7*32+13) */ +#define X86_FEATURE_RSB_VMEXIT ( 7*32+13) /* "" Fill RSB on VM-Exit */ #define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */ #define X86_FEATURE_CDP_L2 ( 7*32+15) /* Code and Data Prioritization L2 */ #define X86_FEATURE_MSR_SPEC_CTRL ( 7*32+16) /* "" MSR SPEC_CTRL is implemented */ diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index cef4f1fa7b7f91..01f3f5cc17356c 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -1394,16 +1394,69 @@ static void __init spectre_v2_select_mitigation(void) pr_info("%s\n", spectre_v2_strings[mode]); /* - * If spectre v2 protection has been enabled, unconditionally fill - * RSB during a context switch; this protects against two independent - * issues: + * If Spectre v2 protection has been enabled, fill the RSB during a + * context switch. In general there are two types of RSB attacks + * across context switches, for which the CALLs/RETs may be unbalanced. * - * - RSB underflow (and switch to BTB) on Skylake+ - * - SpectreRSB variant of spectre v2 on X86_BUG_SPECTRE_V2 CPUs + * 1) RSB underflow + * + * Some Intel parts have "bottomless RSB". When the RSB is empty, + * speculated return targets may come from the branch predictor, + * which could have a user-poisoned BTB or BHB entry. + * + * AMD has it even worse: *all* returns are speculated from the BTB, + * regardless of the state of the RSB. + * + * When IBRS or eIBRS is enabled, the "user -> kernel" attack + * scenario is mitigated by the IBRS branch prediction isolation + * properties, so the RSB buffer filling wouldn't be necessary to + * protect against this type of attack. + * + * The "user -> user" attack scenario is mitigated by RSB filling. + * + * 2) Poisoned RSB entry + * + * If the 'next' in-kernel return stack is shorter than 'prev', + * 'next' could be tricked into speculating with a user-poisoned RSB + * entry. + * + * The "user -> kernel" attack scenario is mitigated by SMEP and + * eIBRS. + * + * The "user -> user" scenario, also known as SpectreBHB, requires + * RSB clearing. + * + * So to mitigate all cases, unconditionally fill RSB on context + * switches. + * + * FIXME: Is this pointless for retbleed-affected AMD? */ setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW); pr_info("Spectre v2 / SpectreRSB mitigation: Filling RSB on context switch\n"); + /* + * Similar to context switches, there are two types of RSB attacks + * after vmexit: + * + * 1) RSB underflow + * + * 2) Poisoned RSB entry + * + * When retpoline is enabled, both are mitigated by filling/clearing + * the RSB. + * + * When IBRS is enabled, while #1 would be mitigated by the IBRS branch + * prediction isolation protections, RSB still needs to be cleared + * because of #2. Note that SMEP provides no protection here, unlike + * user-space-poisoned RSB entries. + * + * eIBRS, on the other hand, has RSB-poisoning protections, so it + * doesn't need RSB clearing after vmexit. + */ + if (boot_cpu_has(X86_FEATURE_RETPOLINE) || + boot_cpu_has(X86_FEATURE_KERNEL_IBRS)) + setup_force_cpu_cap(X86_FEATURE_RSB_VMEXIT); + /* * Retpoline protects the kernel, but doesn't protect firmware. IBRS * and Enhanced IBRS protect firmware too, so enable IBRS around diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S index 1c1a3d6c2af251..857fa0fc49fafd 100644 --- a/arch/x86/kvm/vmx/vmenter.S +++ b/arch/x86/kvm/vmx/vmenter.S @@ -193,15 +193,15 @@ SYM_INNER_LABEL(vmx_vmexit, SYM_L_GLOBAL) * IMPORTANT: RSB filling and SPEC_CTRL handling must be done before * the first unbalanced RET after vmexit! * - * For retpoline, RSB filling is needed to prevent poisoned RSB entries - * and (in some cases) RSB underflow. + * For retpoline or IBRS, RSB filling is needed to prevent poisoned RSB + * entries and (in some cases) RSB underflow. * * eIBRS has its own protection against poisoned RSB, so it doesn't * need the RSB filling sequence. But it does need to be enabled * before the first unbalanced RET. */ - FILL_RETURN_BUFFER %_ASM_CX, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE + FILL_RETURN_BUFFER %_ASM_CX, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_VMEXIT pop %_ASM_ARG2 /* @flags */ pop %_ASM_ARG1 /* @vmx */ From d7a5c08b46b66d6fef6bad800ea5a4bfbaefc3f7 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 24 Jun 2022 14:03:25 +0200 Subject: [PATCH 0732/1056] x86/common: Stamp out the stepping madness commit 7a05bc95ed1c5a59e47aaade9fb4083c27de9e62 upstream. The whole MMIO/RETBLEED enumeration went overboard on steppings. Get rid of all that and simply use ANY. If a future stepping of these models would not be affected, it had better set the relevant ARCH_CAP_$FOO_NO bit in IA32_ARCH_CAPABILITIES. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov Acked-by: Dave Hansen Signed-off-by: Borislav Petkov Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/common.c | 37 ++++++++++++++++-------------------- 1 file changed, 16 insertions(+), 21 deletions(-) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 39e9e84e75afa9..4a507f7b9943c0 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1122,32 +1122,27 @@ static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = { VULNBL_INTEL_STEPPINGS(HASWELL, X86_STEPPING_ANY, SRBDS), VULNBL_INTEL_STEPPINGS(HASWELL_L, X86_STEPPING_ANY, SRBDS), VULNBL_INTEL_STEPPINGS(HASWELL_G, X86_STEPPING_ANY, SRBDS), - VULNBL_INTEL_STEPPINGS(HASWELL_X, BIT(2) | BIT(4), MMIO), - VULNBL_INTEL_STEPPINGS(BROADWELL_D, X86_STEPPINGS(0x3, 0x5), MMIO), + VULNBL_INTEL_STEPPINGS(HASWELL_X, X86_STEPPING_ANY, MMIO), + VULNBL_INTEL_STEPPINGS(BROADWELL_D, X86_STEPPING_ANY, MMIO), VULNBL_INTEL_STEPPINGS(BROADWELL_G, X86_STEPPING_ANY, SRBDS), VULNBL_INTEL_STEPPINGS(BROADWELL_X, X86_STEPPING_ANY, MMIO), VULNBL_INTEL_STEPPINGS(BROADWELL, X86_STEPPING_ANY, SRBDS), - VULNBL_INTEL_STEPPINGS(SKYLAKE_L, X86_STEPPINGS(0x3, 0x3), SRBDS | MMIO | RETBLEED), - VULNBL_INTEL_STEPPINGS(SKYLAKE_L, X86_STEPPING_ANY, SRBDS), - VULNBL_INTEL_STEPPINGS(SKYLAKE_X, BIT(3) | BIT(4) | BIT(6) | - BIT(7) | BIT(0xB), MMIO | RETBLEED), - VULNBL_INTEL_STEPPINGS(SKYLAKE, X86_STEPPINGS(0x3, 0x3), SRBDS | MMIO | RETBLEED), - VULNBL_INTEL_STEPPINGS(SKYLAKE, X86_STEPPING_ANY, SRBDS), - VULNBL_INTEL_STEPPINGS(KABYLAKE_L, X86_STEPPINGS(0x9, 0xC), SRBDS | MMIO | RETBLEED), - VULNBL_INTEL_STEPPINGS(KABYLAKE_L, X86_STEPPINGS(0x0, 0x8), SRBDS), - VULNBL_INTEL_STEPPINGS(KABYLAKE, X86_STEPPINGS(0x9, 0xD), SRBDS | MMIO | RETBLEED), - VULNBL_INTEL_STEPPINGS(KABYLAKE, X86_STEPPINGS(0x0, 0x8), SRBDS), - VULNBL_INTEL_STEPPINGS(ICELAKE_L, X86_STEPPINGS(0x5, 0x5), MMIO | MMIO_SBDS | RETBLEED), - VULNBL_INTEL_STEPPINGS(ICELAKE_D, X86_STEPPINGS(0x1, 0x1), MMIO), - VULNBL_INTEL_STEPPINGS(ICELAKE_X, X86_STEPPINGS(0x4, 0x6), MMIO), - VULNBL_INTEL_STEPPINGS(COMETLAKE, BIT(2) | BIT(3) | BIT(5), MMIO | MMIO_SBDS | RETBLEED), - VULNBL_INTEL_STEPPINGS(COMETLAKE_L, X86_STEPPINGS(0x1, 0x1), MMIO | MMIO_SBDS | RETBLEED), + VULNBL_INTEL_STEPPINGS(SKYLAKE_L, X86_STEPPING_ANY, SRBDS | MMIO | RETBLEED), + VULNBL_INTEL_STEPPINGS(SKYLAKE_X, X86_STEPPING_ANY, MMIO | RETBLEED), + VULNBL_INTEL_STEPPINGS(SKYLAKE, X86_STEPPING_ANY, SRBDS | MMIO | RETBLEED), + VULNBL_INTEL_STEPPINGS(KABYLAKE_L, X86_STEPPING_ANY, SRBDS | MMIO | RETBLEED), + VULNBL_INTEL_STEPPINGS(KABYLAKE, X86_STEPPING_ANY, SRBDS | MMIO | RETBLEED), + VULNBL_INTEL_STEPPINGS(ICELAKE_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED), + VULNBL_INTEL_STEPPINGS(ICELAKE_D, X86_STEPPING_ANY, MMIO), + VULNBL_INTEL_STEPPINGS(ICELAKE_X, X86_STEPPING_ANY, MMIO), + VULNBL_INTEL_STEPPINGS(COMETLAKE, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED), VULNBL_INTEL_STEPPINGS(COMETLAKE_L, X86_STEPPINGS(0x0, 0x0), MMIO | RETBLEED), - VULNBL_INTEL_STEPPINGS(LAKEFIELD, X86_STEPPINGS(0x1, 0x1), MMIO | MMIO_SBDS | RETBLEED), - VULNBL_INTEL_STEPPINGS(ROCKETLAKE, X86_STEPPINGS(0x1, 0x1), MMIO | RETBLEED), - VULNBL_INTEL_STEPPINGS(ATOM_TREMONT, X86_STEPPINGS(0x1, 0x1), MMIO | MMIO_SBDS), + VULNBL_INTEL_STEPPINGS(COMETLAKE_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED), + VULNBL_INTEL_STEPPINGS(LAKEFIELD, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED), + VULNBL_INTEL_STEPPINGS(ROCKETLAKE, X86_STEPPING_ANY, MMIO | RETBLEED), + VULNBL_INTEL_STEPPINGS(ATOM_TREMONT, X86_STEPPING_ANY, MMIO | MMIO_SBDS), VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_D, X86_STEPPING_ANY, MMIO), - VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_L, X86_STEPPINGS(0x0, 0x0), MMIO | MMIO_SBDS), + VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS), VULNBL_AMD(0x15, RETBLEED), VULNBL_AMD(0x16, RETBLEED), From 12a13570054fcbf347aaf37824ea43915f9f739c Mon Sep 17 00:00:00 2001 From: Andrew Cooper Date: Fri, 24 Jun 2022 14:41:21 +0100 Subject: [PATCH 0733/1056] x86/cpu/amd: Enumerate BTC_NO commit 26aae8ccbc1972233afd08fb3f368947c0314265 upstream. BTC_NO indicates that hardware is not susceptible to Branch Type Confusion. Zen3 CPUs don't suffer BTC. Hypervisors are expected to synthesise BTC_NO when it is appropriate given the migration pool, to prevent kernels using heuristics. [ bp: Massage. ] Signed-off-by: Andrew Cooper Signed-off-by: Borislav Petkov [cascardo: no X86_FEATURE_BRS] [cascardo: no X86_FEATURE_CPPC] Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/cpufeatures.h | 1 + arch/x86/kernel/cpu/amd.c | 21 +++++++++++++++------ arch/x86/kernel/cpu/common.c | 6 ++++-- 3 files changed, 20 insertions(+), 8 deletions(-) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 47fc8ff98d2b4f..27f2ebf3bd2725 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -319,6 +319,7 @@ #define X86_FEATURE_AMD_SSBD (13*32+24) /* "" Speculative Store Bypass Disable */ #define X86_FEATURE_VIRT_SSBD (13*32+25) /* Virtualized Speculative Store Bypass Disable */ #define X86_FEATURE_AMD_SSB_NO (13*32+26) /* "" Speculative Store Bypass is fixed in hardware. */ +#define X86_FEATURE_BTC_NO (13*32+29) /* "" Not vulnerable to Branch Type Confusion */ /* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 14 */ #define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */ diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index c3a23c398680c6..92d8d8d833093a 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -914,12 +914,21 @@ static void init_amd_zn(struct cpuinfo_x86 *c) node_reclaim_distance = 32; #endif - /* - * Fix erratum 1076: CPB feature bit not being set in CPUID. - * Always set it, except when running under a hypervisor. - */ - if (!cpu_has(c, X86_FEATURE_HYPERVISOR) && !cpu_has(c, X86_FEATURE_CPB)) - set_cpu_cap(c, X86_FEATURE_CPB); + /* Fix up CPUID bits, but only if not virtualised. */ + if (!cpu_has(c, X86_FEATURE_HYPERVISOR)) { + + /* Erratum 1076: CPB feature bit not being set in CPUID. */ + if (!cpu_has(c, X86_FEATURE_CPB)) + set_cpu_cap(c, X86_FEATURE_CPB); + + /* + * Zen3 (Fam19 model < 0x10) parts are not susceptible to + * Branch Type Confusion, but predate the allocation of the + * BTC_NO bit. + */ + if (c->x86 == 0x19 && !cpu_has(c, X86_FEATURE_BTC_NO)) + set_cpu_cap(c, X86_FEATURE_BTC_NO); + } } static void init_amd(struct cpuinfo_x86 *c) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 4a507f7b9943c0..34ac05f0581343 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1249,8 +1249,10 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) !arch_cap_mmio_immune(ia32_cap)) setup_force_cpu_bug(X86_BUG_MMIO_STALE_DATA); - if ((cpu_matches(cpu_vuln_blacklist, RETBLEED) || (ia32_cap & ARCH_CAP_RSBA))) - setup_force_cpu_bug(X86_BUG_RETBLEED); + if (!cpu_has(c, X86_FEATURE_BTC_NO)) { + if (cpu_matches(cpu_vuln_blacklist, RETBLEED) || (ia32_cap & ARCH_CAP_RSBA)) + setup_force_cpu_bug(X86_BUG_RETBLEED); + } if (cpu_matches(cpu_vuln_whitelist, NO_MELTDOWN)) return; From cc3011cdbe5f834b0e9873b841d1be02be9a1524 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 27 Jun 2022 22:21:17 +0000 Subject: [PATCH 0734/1056] x86/retbleed: Add fine grained Kconfig knobs commit f43b9876e857c739d407bc56df288b0ebe1a9164 upstream. Do fine-grained Kconfig for all the various retbleed parts. NOTE: if your compiler doesn't support return thunks this will silently 'upgrade' your mitigation to IBPB, you might not like this. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov [cascardo: there is no CONFIG_OBJTOOL] [cascardo: objtool calling and option parsing has changed] Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Greg Kroah-Hartman --- Makefile | 8 +- arch/x86/Kconfig | 106 ++++++++++++++++++----- arch/x86/entry/calling.h | 4 + arch/x86/include/asm/disabled-features.h | 18 +++- arch/x86/include/asm/linkage.h | 4 +- arch/x86/include/asm/nospec-branch.h | 10 ++- arch/x86/include/asm/static_call.h | 2 +- arch/x86/kernel/alternative.c | 5 ++ arch/x86/kernel/cpu/amd.c | 2 + arch/x86/kernel/cpu/bugs.c | 42 +++++---- arch/x86/kernel/static_call.c | 2 +- arch/x86/kvm/emulate.c | 4 +- arch/x86/lib/retpoline.S | 4 + scripts/Makefile.build | 1 + scripts/link-vmlinux.sh | 2 +- security/Kconfig | 11 --- tools/objtool/builtin-check.c | 3 +- tools/objtool/check.c | 9 +- tools/objtool/include/objtool/builtin.h | 2 +- 19 files changed, 170 insertions(+), 69 deletions(-) diff --git a/Makefile b/Makefile index 64190738e71357..e3864845d64af7 100644 --- a/Makefile +++ b/Makefile @@ -687,14 +687,18 @@ endif ifdef CONFIG_CC_IS_GCC RETPOLINE_CFLAGS := $(call cc-option,-mindirect-branch=thunk-extern -mindirect-branch-register) -RETPOLINE_CFLAGS += $(call cc-option,-mfunction-return=thunk-extern) RETPOLINE_VDSO_CFLAGS := $(call cc-option,-mindirect-branch=thunk-inline -mindirect-branch-register) endif ifdef CONFIG_CC_IS_CLANG RETPOLINE_CFLAGS := -mretpoline-external-thunk RETPOLINE_VDSO_CFLAGS := -mretpoline -RETPOLINE_CFLAGS += $(call cc-option,-mfunction-return=thunk-extern) endif + +ifdef CONFIG_RETHUNK +RETHUNK_CFLAGS := -mfunction-return=thunk-extern +RETPOLINE_CFLAGS += $(RETHUNK_CFLAGS) +endif + export RETPOLINE_CFLAGS export RETPOLINE_VDSO_CFLAGS diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index babe46b4b85f6f..a170cfdae2a7aa 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -459,30 +459,6 @@ config GOLDFISH def_bool y depends on X86_GOLDFISH -config RETPOLINE - bool "Avoid speculative indirect branches in kernel" - default y - help - Compile kernel with the retpoline compiler options to guard against - kernel-to-user data leaks by avoiding speculative indirect - branches. Requires a compiler with -mindirect-branch=thunk-extern - support for full protection. The kernel may run slower. - -config CC_HAS_SLS - def_bool $(cc-option,-mharden-sls=all) - -config CC_HAS_RETURN_THUNK - def_bool $(cc-option,-mfunction-return=thunk-extern) - -config SLS - bool "Mitigate Straight-Line-Speculation" - depends on CC_HAS_SLS && X86_64 - default n - help - Compile the kernel with straight-line-speculation options to guard - against straight line speculation. The kernel image might be slightly - larger. - config X86_CPU_RESCTRL bool "x86 CPU resource control support" depends on X86 && (CPU_SUP_INTEL || CPU_SUP_AMD) @@ -2410,6 +2386,88 @@ source "kernel/livepatch/Kconfig" endmenu +config CC_HAS_SLS + def_bool $(cc-option,-mharden-sls=all) + +config CC_HAS_RETURN_THUNK + def_bool $(cc-option,-mfunction-return=thunk-extern) + +menuconfig SPECULATION_MITIGATIONS + bool "Mitigations for speculative execution vulnerabilities" + default y + help + Say Y here to enable options which enable mitigations for + speculative execution hardware vulnerabilities. + + If you say N, all mitigations will be disabled. You really + should know what you are doing to say so. + +if SPECULATION_MITIGATIONS + +config PAGE_TABLE_ISOLATION + bool "Remove the kernel mapping in user mode" + default y + depends on (X86_64 || X86_PAE) + help + This feature reduces the number of hardware side channels by + ensuring that the majority of kernel addresses are not mapped + into userspace. + + See Documentation/x86/pti.rst for more details. + +config RETPOLINE + bool "Avoid speculative indirect branches in kernel" + default y + help + Compile kernel with the retpoline compiler options to guard against + kernel-to-user data leaks by avoiding speculative indirect + branches. Requires a compiler with -mindirect-branch=thunk-extern + support for full protection. The kernel may run slower. + +config RETHUNK + bool "Enable return-thunks" + depends on RETPOLINE && CC_HAS_RETURN_THUNK + default y + help + Compile the kernel with the return-thunks compiler option to guard + against kernel-to-user data leaks by avoiding return speculation. + Requires a compiler with -mfunction-return=thunk-extern + support for full protection. The kernel may run slower. + +config CPU_UNRET_ENTRY + bool "Enable UNRET on kernel entry" + depends on CPU_SUP_AMD && RETHUNK + default y + help + Compile the kernel with support for the retbleed=unret mitigation. + +config CPU_IBPB_ENTRY + bool "Enable IBPB on kernel entry" + depends on CPU_SUP_AMD + default y + help + Compile the kernel with support for the retbleed=ibpb mitigation. + +config CPU_IBRS_ENTRY + bool "Enable IBRS on kernel entry" + depends on CPU_SUP_INTEL + default y + help + Compile the kernel with support for the spectre_v2=ibrs mitigation. + This mitigates both spectre_v2 and retbleed at great cost to + performance. + +config SLS + bool "Mitigate Straight-Line-Speculation" + depends on CC_HAS_SLS && X86_64 + default n + help + Compile the kernel with straight-line-speculation options to guard + against straight line speculation. The kernel image might be slightly + larger. + +endif + config ARCH_HAS_ADD_PAGES def_bool y depends on X86_64 && ARCH_ENABLE_MEMORY_HOTPLUG diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h index 99d2410c18d1c2..b00a3a95fbfabe 100644 --- a/arch/x86/entry/calling.h +++ b/arch/x86/entry/calling.h @@ -296,6 +296,7 @@ For 32-bit we have the following conventions - kernel is built with * Assumes x86_spec_ctrl_{base,current} to have SPEC_CTRL_IBRS set. */ .macro IBRS_ENTER save_reg +#ifdef CONFIG_CPU_IBRS_ENTRY ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_KERNEL_IBRS movl $MSR_IA32_SPEC_CTRL, %ecx @@ -316,6 +317,7 @@ For 32-bit we have the following conventions - kernel is built with shr $32, %rdx wrmsr .Lend_\@: +#endif .endm /* @@ -323,6 +325,7 @@ For 32-bit we have the following conventions - kernel is built with * regs. Must be called after the last RET. */ .macro IBRS_EXIT save_reg +#ifdef CONFIG_CPU_IBRS_ENTRY ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_KERNEL_IBRS movl $MSR_IA32_SPEC_CTRL, %ecx @@ -337,6 +340,7 @@ For 32-bit we have the following conventions - kernel is built with shr $32, %rdx wrmsr .Lend_\@: +#endif .endm /* diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h index 1905ac5cd39628..834a3b6d81e121 100644 --- a/arch/x86/include/asm/disabled-features.h +++ b/arch/x86/include/asm/disabled-features.h @@ -60,9 +60,19 @@ # define DISABLE_RETPOLINE 0 #else # define DISABLE_RETPOLINE ((1 << (X86_FEATURE_RETPOLINE & 31)) | \ - (1 << (X86_FEATURE_RETPOLINE_LFENCE & 31)) | \ - (1 << (X86_FEATURE_RETHUNK & 31)) | \ - (1 << (X86_FEATURE_UNRET & 31))) + (1 << (X86_FEATURE_RETPOLINE_LFENCE & 31))) +#endif + +#ifdef CONFIG_RETHUNK +# define DISABLE_RETHUNK 0 +#else +# define DISABLE_RETHUNK (1 << (X86_FEATURE_RETHUNK & 31)) +#endif + +#ifdef CONFIG_CPU_UNRET_ENTRY +# define DISABLE_UNRET 0 +#else +# define DISABLE_UNRET (1 << (X86_FEATURE_UNRET & 31)) #endif /* Force disable because it's broken beyond repair */ @@ -88,7 +98,7 @@ #define DISABLED_MASK8 0 #define DISABLED_MASK9 (DISABLE_SMAP|DISABLE_SGX) #define DISABLED_MASK10 0 -#define DISABLED_MASK11 (DISABLE_RETPOLINE) +#define DISABLED_MASK11 (DISABLE_RETPOLINE|DISABLE_RETHUNK|DISABLE_UNRET) #define DISABLED_MASK12 0 #define DISABLED_MASK13 0 #define DISABLED_MASK14 0 diff --git a/arch/x86/include/asm/linkage.h b/arch/x86/include/asm/linkage.h index d04e61c2f863c5..5000cf59bdf5b6 100644 --- a/arch/x86/include/asm/linkage.h +++ b/arch/x86/include/asm/linkage.h @@ -18,7 +18,7 @@ #define __ALIGN_STR __stringify(__ALIGN) #endif -#if defined(CONFIG_RETPOLINE) && !defined(__DISABLE_EXPORTS) && !defined(BUILD_VDSO) +#if defined(CONFIG_RETHUNK) && !defined(__DISABLE_EXPORTS) && !defined(BUILD_VDSO) #define RET jmp __x86_return_thunk #else /* CONFIG_RETPOLINE */ #ifdef CONFIG_SLS @@ -30,7 +30,7 @@ #else /* __ASSEMBLY__ */ -#if defined(CONFIG_RETPOLINE) && !defined(__DISABLE_EXPORTS) && !defined(BUILD_VDSO) +#if defined(CONFIG_RETHUNK) && !defined(__DISABLE_EXPORTS) && !defined(BUILD_VDSO) #define ASM_RET "jmp __x86_return_thunk\n\t" #else /* CONFIG_RETPOLINE */ #ifdef CONFIG_SLS diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index a27d9880e4cd11..b73b87006d37aa 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -127,6 +127,12 @@ .Lskip_rsb_\@: .endm +#ifdef CONFIG_CPU_UNRET_ENTRY +#define CALL_ZEN_UNTRAIN_RET "call zen_untrain_ret" +#else +#define CALL_ZEN_UNTRAIN_RET "" +#endif + /* * Mitigate RETBleed for AMD/Hygon Zen uarch. Requires KERNEL CR3 because the * return thunk isn't mapped into the userspace tables (then again, AMD @@ -139,10 +145,10 @@ * where we have a stack but before any RET instruction. */ .macro UNTRAIN_RET -#ifdef CONFIG_RETPOLINE +#if defined(CONFIG_CPU_UNRET_ENTRY) || defined(CONFIG_CPU_IBPB_ENTRY) ANNOTATE_UNRET_END ALTERNATIVE_2 "", \ - "call zen_untrain_ret", X86_FEATURE_UNRET, \ + CALL_ZEN_UNTRAIN_RET, X86_FEATURE_UNRET, \ "call entry_ibpb", X86_FEATURE_ENTRY_IBPB #endif .endm diff --git a/arch/x86/include/asm/static_call.h b/arch/x86/include/asm/static_call.h index 7efce22d635523..491aadfac61178 100644 --- a/arch/x86/include/asm/static_call.h +++ b/arch/x86/include/asm/static_call.h @@ -44,7 +44,7 @@ #define ARCH_DEFINE_STATIC_CALL_TRAMP(name, func) \ __ARCH_DEFINE_STATIC_CALL_TRAMP(name, ".byte 0xe9; .long " #func " - (. + 4)") -#ifdef CONFIG_RETPOLINE +#ifdef CONFIG_RETHUNK #define ARCH_DEFINE_STATIC_CALL_NULL_TRAMP(name) \ __ARCH_DEFINE_STATIC_CALL_TRAMP(name, "jmp __x86_return_thunk") #else diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index db475542d78b44..8ed9ccf53b6239 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -507,6 +507,7 @@ void __init_or_module noinline apply_retpolines(s32 *start, s32 *end) } } +#ifdef CONFIG_RETHUNK /* * Rewrite the compiler generated return thunk tail-calls. * @@ -568,6 +569,10 @@ void __init_or_module noinline apply_returns(s32 *start, s32 *end) } } } +#else +void __init_or_module noinline apply_returns(s32 *start, s32 *end) { } +#endif /* CONFIG_RETHUNK */ + #else /* !RETPOLINES || !CONFIG_STACK_VALIDATION */ void __init_or_module noinline apply_retpolines(s32 *start, s32 *end) { } diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 92d8d8d833093a..8b1bf1c14fc355 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -888,6 +888,7 @@ static void init_amd_bd(struct cpuinfo_x86 *c) void init_spectral_chicken(struct cpuinfo_x86 *c) { +#ifdef CONFIG_CPU_UNRET_ENTRY u64 value; /* @@ -904,6 +905,7 @@ void init_spectral_chicken(struct cpuinfo_x86 *c) wrmsrl_safe(MSR_ZEN2_SPECTRAL_CHICKEN, value); } } +#endif } static void init_amd_zn(struct cpuinfo_x86 *c) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 01f3f5cc17356c..95219eb8e45cc6 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -835,7 +835,6 @@ static int __init retbleed_parse_cmdline(char *str) early_param("retbleed", retbleed_parse_cmdline); #define RETBLEED_UNTRAIN_MSG "WARNING: BTB untrained return thunk mitigation is only effective on AMD/Hygon!\n" -#define RETBLEED_COMPILER_MSG "WARNING: kernel not compiled with RETPOLINE or -mfunction-return capable compiler; falling back to IBPB!\n" #define RETBLEED_INTEL_MSG "WARNING: Spectre v2 mitigation leaves CPU vulnerable to RETBleed attacks, data leaks possible!\n" static void __init retbleed_select_mitigation(void) @@ -850,18 +849,33 @@ static void __init retbleed_select_mitigation(void) return; case RETBLEED_CMD_UNRET: - retbleed_mitigation = RETBLEED_MITIGATION_UNRET; + if (IS_ENABLED(CONFIG_CPU_UNRET_ENTRY)) { + retbleed_mitigation = RETBLEED_MITIGATION_UNRET; + } else { + pr_err("WARNING: kernel not compiled with CPU_UNRET_ENTRY.\n"); + goto do_cmd_auto; + } break; case RETBLEED_CMD_IBPB: - retbleed_mitigation = RETBLEED_MITIGATION_IBPB; + if (IS_ENABLED(CONFIG_CPU_IBPB_ENTRY)) { + retbleed_mitigation = RETBLEED_MITIGATION_IBPB; + } else { + pr_err("WARNING: kernel not compiled with CPU_IBPB_ENTRY.\n"); + goto do_cmd_auto; + } break; +do_cmd_auto: case RETBLEED_CMD_AUTO: default: if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD || - boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) - retbleed_mitigation = RETBLEED_MITIGATION_UNRET; + boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) { + if (IS_ENABLED(CONFIG_CPU_UNRET_ENTRY)) + retbleed_mitigation = RETBLEED_MITIGATION_UNRET; + else if (IS_ENABLED(CONFIG_CPU_IBPB_ENTRY)) + retbleed_mitigation = RETBLEED_MITIGATION_IBPB; + } /* * The Intel mitigation (IBRS or eIBRS) was already selected in @@ -874,14 +888,6 @@ static void __init retbleed_select_mitigation(void) switch (retbleed_mitigation) { case RETBLEED_MITIGATION_UNRET: - - if (!IS_ENABLED(CONFIG_RETPOLINE) || - !IS_ENABLED(CONFIG_CC_HAS_RETURN_THUNK)) { - pr_err(RETBLEED_COMPILER_MSG); - retbleed_mitigation = RETBLEED_MITIGATION_IBPB; - goto retbleed_force_ibpb; - } - setup_force_cpu_cap(X86_FEATURE_RETHUNK); setup_force_cpu_cap(X86_FEATURE_UNRET); @@ -893,7 +899,6 @@ static void __init retbleed_select_mitigation(void) break; case RETBLEED_MITIGATION_IBPB: -retbleed_force_ibpb: setup_force_cpu_cap(X86_FEATURE_ENTRY_IBPB); mitigate_smt = true; break; @@ -1264,6 +1269,12 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void) return SPECTRE_V2_CMD_AUTO; } + if (cmd == SPECTRE_V2_CMD_IBRS && !IS_ENABLED(CONFIG_CPU_IBRS_ENTRY)) { + pr_err("%s selected but not compiled in. Switching to AUTO select\n", + mitigation_options[i].option); + return SPECTRE_V2_CMD_AUTO; + } + if (cmd == SPECTRE_V2_CMD_IBRS && boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) { pr_err("%s selected but not Intel CPU. Switching to AUTO select\n", mitigation_options[i].option); @@ -1321,7 +1332,8 @@ static void __init spectre_v2_select_mitigation(void) break; } - if (boot_cpu_has_bug(X86_BUG_RETBLEED) && + if (IS_ENABLED(CONFIG_CPU_IBRS_ENTRY) && + boot_cpu_has_bug(X86_BUG_RETBLEED) && retbleed_cmd != RETBLEED_CMD_OFF && boot_cpu_has(X86_FEATURE_IBRS) && boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) { diff --git a/arch/x86/kernel/static_call.c b/arch/x86/kernel/static_call.c index 2425bea07cdc27..6b0c6c22880a12 100644 --- a/arch/x86/kernel/static_call.c +++ b/arch/x86/kernel/static_call.c @@ -120,7 +120,7 @@ void arch_static_call_transform(void *site, void *tramp, void *func, bool tail) } EXPORT_SYMBOL_GPL(arch_static_call_transform); -#ifdef CONFIG_RETPOLINE +#ifdef CONFIG_RETHUNK /* * This is called by apply_returns() to fix up static call trampolines, * specifically ARCH_DEFINE_STATIC_CALL_NULL_TRAMP which is recorded as diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index d7c4ba7217f679..33feb282886670 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -434,10 +434,10 @@ static int fastop(struct x86_emulate_ctxt *ctxt, fastop_t fop); * Depending on .config the SETcc functions look like: * * SETcc %al [3 bytes] - * RET | JMP __x86_return_thunk [1,5 bytes; CONFIG_RETPOLINE] + * RET | JMP __x86_return_thunk [1,5 bytes; CONFIG_RETHUNK] * INT3 [1 byte; CONFIG_SLS] */ -#define RET_LENGTH (1 + (4 * IS_ENABLED(CONFIG_RETPOLINE)) + \ +#define RET_LENGTH (1 + (4 * IS_ENABLED(CONFIG_RETHUNK)) + \ IS_ENABLED(CONFIG_SLS)) #define SETCC_LENGTH (3 + RET_LENGTH) #define SETCC_ALIGN (4 << ((SETCC_LENGTH > 4) & 1) << ((SETCC_LENGTH > 8) & 1)) diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S index 807f674fd59e5c..1221bb099afb4d 100644 --- a/arch/x86/lib/retpoline.S +++ b/arch/x86/lib/retpoline.S @@ -71,6 +71,8 @@ SYM_CODE_END(__x86_indirect_thunk_array) * This function name is magical and is used by -mfunction-return=thunk-extern * for the compiler to generate JMPs to it. */ +#ifdef CONFIG_RETHUNK + .section .text.__x86.return_thunk /* @@ -135,3 +137,5 @@ SYM_FUNC_END(zen_untrain_ret) __EXPORT_THUNK(zen_untrain_ret) EXPORT_SYMBOL(__x86_return_thunk) + +#endif /* CONFIG_RETHUNK */ diff --git a/scripts/Makefile.build b/scripts/Makefile.build index 2b988b6ccacb66..17aa8ef2d52a76 100644 --- a/scripts/Makefile.build +++ b/scripts/Makefile.build @@ -231,6 +231,7 @@ objtool_args = \ $(if $(CONFIG_FRAME_POINTER),, --no-fp) \ $(if $(CONFIG_GCOV_KERNEL)$(CONFIG_LTO_CLANG), --no-unreachable)\ $(if $(CONFIG_RETPOLINE), --retpoline) \ + $(if $(CONFIG_RETHUNK), --rethunk) \ $(if $(CONFIG_X86_SMAP), --uaccess) \ $(if $(CONFIG_FTRACE_MCOUNT_USE_OBJTOOL), --mcount) \ $(if $(CONFIG_SLS), --sls) diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh index 3c2adb7563f1b5..3819a461465d8e 100755 --- a/scripts/link-vmlinux.sh +++ b/scripts/link-vmlinux.sh @@ -120,7 +120,7 @@ objtool_link() if [ -n "${CONFIG_VMLINUX_VALIDATION}" ]; then objtoolopt="${objtoolopt} --noinstr" - if is_enabled CONFIG_RETPOLINE; then + if is_enabled CONFIG_CPU_UNRET_ENTRY; then objtoolopt="${objtoolopt} --unret" fi fi diff --git a/security/Kconfig b/security/Kconfig index fe6c0395fa0251..5d412b3ddc496b 100644 --- a/security/Kconfig +++ b/security/Kconfig @@ -54,17 +54,6 @@ config SECURITY_NETWORK implement socket and networking access controls. If you are unsure how to answer this question, answer N. -config PAGE_TABLE_ISOLATION - bool "Remove the kernel mapping in user mode" - default y - depends on (X86_64 || X86_PAE) && !UML - help - This feature reduces the number of hardware side channels by - ensuring that the majority of kernel addresses are not mapped - into userspace. - - See Documentation/x86/pti.rst for more details. - config SECURITY_INFINIBAND bool "Infiniband Security Hooks" depends on SECURITY && INFINIBAND diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c index ea14dfcad60abd..35081fe3732031 100644 --- a/tools/objtool/builtin-check.c +++ b/tools/objtool/builtin-check.c @@ -20,7 +20,7 @@ #include bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats, - validate_dup, vmlinux, mcount, noinstr, backup, sls, unret; + validate_dup, vmlinux, mcount, noinstr, backup, sls, unret, rethunk; static const char * const check_usage[] = { "objtool check [] file.o", @@ -36,6 +36,7 @@ const struct option check_options[] = { OPT_BOOLEAN('f', "no-fp", &no_fp, "Skip frame pointer validation"), OPT_BOOLEAN('u', "no-unreachable", &no_unreachable, "Skip 'unreachable instruction' warnings"), OPT_BOOLEAN('r', "retpoline", &retpoline, "Validate retpoline assumptions"), + OPT_BOOLEAN(0, "rethunk", &rethunk, "validate and annotate rethunk usage"), OPT_BOOLEAN(0, "unret", &unret, "validate entry unret placement"), OPT_BOOLEAN('m', "module", &module, "Indicates the object will be part of a kernel module"), OPT_BOOLEAN('b', "backtrace", &backtrace, "unwind on error"), diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 4c18a7e283445d..72e5d23f1ad882 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -3440,8 +3440,11 @@ static int validate_retpoline(struct objtool_file *file) continue; if (insn->type == INSN_RETURN) { - WARN_FUNC("'naked' return found in RETPOLINE build", - insn->sec, insn->offset); + if (rethunk) { + WARN_FUNC("'naked' return found in RETHUNK build", + insn->sec, insn->offset); + } else + continue; } else { WARN_FUNC("indirect %s found in RETPOLINE build", insn->sec, insn->offset, @@ -3711,7 +3714,9 @@ int check(struct objtool_file *file) if (ret < 0) goto out; warnings += ret; + } + if (rethunk) { ret = create_return_sites_sections(file); if (ret < 0) goto out; diff --git a/tools/objtool/include/objtool/builtin.h b/tools/objtool/include/objtool/builtin.h index 1758a75f565b51..66ad30ec581825 100644 --- a/tools/objtool/include/objtool/builtin.h +++ b/tools/objtool/include/objtool/builtin.h @@ -9,7 +9,7 @@ extern const struct option check_options[]; extern bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats, - validate_dup, vmlinux, mcount, noinstr, backup, sls, unret; + validate_dup, vmlinux, mcount, noinstr, backup, sls, unret, rethunk; extern int cmd_parse_options(int argc, const char **argv, const char * const usage[]); From 2f8967e22322251f3c4aa74f6ee30c267d5f91bb Mon Sep 17 00:00:00 2001 From: Pawan Gupta Date: Wed, 6 Jul 2022 15:01:15 -0700 Subject: [PATCH 0735/1056] x86/bugs: Add Cannon lake to RETBleed affected CPU list commit f54d45372c6ac9c993451de5e51312485f7d10bc upstream. Cannon lake is also affected by RETBleed, add it to the list. Fixes: 6ad0ad2bf8a6 ("x86/bugs: Report Intel retbleed vulnerability") Signed-off-by: Pawan Gupta Signed-off-by: Borislav Petkov Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/common.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 34ac05f0581343..80cc41f7978309 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1132,6 +1132,7 @@ static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = { VULNBL_INTEL_STEPPINGS(SKYLAKE, X86_STEPPING_ANY, SRBDS | MMIO | RETBLEED), VULNBL_INTEL_STEPPINGS(KABYLAKE_L, X86_STEPPING_ANY, SRBDS | MMIO | RETBLEED), VULNBL_INTEL_STEPPINGS(KABYLAKE, X86_STEPPING_ANY, SRBDS | MMIO | RETBLEED), + VULNBL_INTEL_STEPPINGS(CANNONLAKE_L, X86_STEPPING_ANY, RETBLEED), VULNBL_INTEL_STEPPINGS(ICELAKE_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED), VULNBL_INTEL_STEPPINGS(ICELAKE_D, X86_STEPPING_ANY, MMIO), VULNBL_INTEL_STEPPINGS(ICELAKE_X, X86_STEPPING_ANY, MMIO), From 925340f99bdfd747bcb6950d5576224fd75dc103 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 6 Jul 2022 15:33:30 +0200 Subject: [PATCH 0736/1056] x86/entry: Move PUSH_AND_CLEAR_REGS() back into error_entry commit 2c08b9b38f5b0f4a6c2d29be22b695e4ec4a556b upstream. Commit ee774dac0da1 ("x86/entry: Move PUSH_AND_CLEAR_REGS out of error_entry()") moved PUSH_AND_CLEAR_REGS out of error_entry, into its own function, in part to avoid calling error_entry() for XenPV. However, commit 7c81c0c9210c ("x86/entry: Avoid very early RET") had to change that because the 'ret' was too early and moved it into idtentry, bloating the text size, since idtentry is expanded for every exception vector. However, with the advent of xen_error_entry() in commit d147553b64bad ("x86/xen: Add UNTRAIN_RET") it became possible to remove PUSH_AND_CLEAR_REGS from idtentry, back into *error_entry(). Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov [cascardo: error_entry still does cld] Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Greg Kroah-Hartman --- arch/x86/entry/entry_64.S | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 08b047b023ee47..763ff243aeca66 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -320,6 +320,8 @@ SYM_CODE_END(ret_from_fork) SYM_CODE_START_LOCAL(xen_error_entry) UNWIND_HINT_FUNC + PUSH_AND_CLEAR_REGS save_ret=1 + ENCODE_FRAME_POINTER 8 UNTRAIN_RET RET SYM_CODE_END(xen_error_entry) @@ -331,9 +333,6 @@ SYM_CODE_END(xen_error_entry) */ .macro idtentry_body cfunc has_error_code:req - PUSH_AND_CLEAR_REGS - ENCODE_FRAME_POINTER - /* * Call error_entry() and switch to the task stack if from userspace. * @@ -1014,6 +1013,10 @@ SYM_CODE_END(paranoid_exit) SYM_CODE_START_LOCAL(error_entry) UNWIND_HINT_FUNC cld + + PUSH_AND_CLEAR_REGS save_ret=1 + ENCODE_FRAME_POINTER 8 + testb $3, CS+8(%rsp) jz .Lerror_kernelspace From 3653093b7bbfa8c8c61858bffdc7f1e41ad436e1 Mon Sep 17 00:00:00 2001 From: Thadeu Lima de Souza Cascardo Date: Thu, 7 Jul 2022 13:41:52 -0300 Subject: [PATCH 0737/1056] x86/bugs: Do not enable IBPB-on-entry when IBPB is not supported commit 2259da159fbe5dba8ac00b560cf00b6a6537fa18 upstream. There are some VM configurations which have Skylake model but do not support IBPB. In those cases, when using retbleed=ibpb, userspace is going to be killed and kernel is going to panic. If the CPU does not support IBPB, warn and proceed with the auto option. Also, do not fallback to IBPB on AMD/Hygon systems if it is not supported. Fixes: 3ebc17006888 ("x86/bugs: Add retbleed=ibpb") Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Borislav Petkov Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 95219eb8e45cc6..921d718e7f028c 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -858,7 +858,10 @@ static void __init retbleed_select_mitigation(void) break; case RETBLEED_CMD_IBPB: - if (IS_ENABLED(CONFIG_CPU_IBPB_ENTRY)) { + if (!boot_cpu_has(X86_FEATURE_IBPB)) { + pr_err("WARNING: CPU does not support IBPB.\n"); + goto do_cmd_auto; + } else if (IS_ENABLED(CONFIG_CPU_IBPB_ENTRY)) { retbleed_mitigation = RETBLEED_MITIGATION_IBPB; } else { pr_err("WARNING: kernel not compiled with CPU_IBPB_ENTRY.\n"); @@ -873,7 +876,7 @@ static void __init retbleed_select_mitigation(void) boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) { if (IS_ENABLED(CONFIG_CPU_UNRET_ENTRY)) retbleed_mitigation = RETBLEED_MITIGATION_UNRET; - else if (IS_ENABLED(CONFIG_CPU_IBPB_ENTRY)) + else if (IS_ENABLED(CONFIG_CPU_IBPB_ENTRY) && boot_cpu_has(X86_FEATURE_IBPB)) retbleed_mitigation = RETBLEED_MITIGATION_IBPB; } From db0128b8243fe6f1004b07a622ab8f3bc7260be8 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Fri, 8 Jul 2022 19:10:11 +0200 Subject: [PATCH 0738/1056] x86/kexec: Disable RET on kexec commit 697977d8415d61f3acbc4ee6d564c9dcf0309507 upstream. All the invocations unroll to __x86_return_thunk and this file must be PIC independent. This fixes kexec on 64-bit AMD boxes. [ bp: Fix 32-bit build. ] Reported-by: Edward Tran Reported-by: Awais Tanveer Suggested-by: Ankur Arora Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Alexandre Chartre Signed-off-by: Borislav Petkov Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/relocate_kernel_32.S | 25 +++++++++++++++++++------ arch/x86/kernel/relocate_kernel_64.S | 23 +++++++++++++++++------ 2 files changed, 36 insertions(+), 12 deletions(-) diff --git a/arch/x86/kernel/relocate_kernel_32.S b/arch/x86/kernel/relocate_kernel_32.S index fcc8a7699103a4..c7c4b1917336d1 100644 --- a/arch/x86/kernel/relocate_kernel_32.S +++ b/arch/x86/kernel/relocate_kernel_32.S @@ -7,10 +7,12 @@ #include #include #include +#include #include /* - * Must be relocatable PIC code callable as a C function + * Must be relocatable PIC code callable as a C function, in particular + * there must be a plain RET and not jump to return thunk. */ #define PTR(x) (x << 2) @@ -91,7 +93,9 @@ SYM_CODE_START_NOALIGN(relocate_kernel) movl %edi, %eax addl $(identity_mapped - relocate_kernel), %eax pushl %eax - RET + ANNOTATE_UNRET_SAFE + ret + int3 SYM_CODE_END(relocate_kernel) SYM_CODE_START_LOCAL_NOALIGN(identity_mapped) @@ -159,12 +163,15 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped) xorl %edx, %edx xorl %esi, %esi xorl %ebp, %ebp - RET + ANNOTATE_UNRET_SAFE + ret + int3 1: popl %edx movl CP_PA_SWAP_PAGE(%edi), %esp addl $PAGE_SIZE, %esp 2: + ANNOTATE_RETPOLINE_SAFE call *%edx /* get the re-entry point of the peer system */ @@ -190,7 +197,9 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped) movl %edi, %eax addl $(virtual_mapped - relocate_kernel), %eax pushl %eax - RET + ANNOTATE_UNRET_SAFE + ret + int3 SYM_CODE_END(identity_mapped) SYM_CODE_START_LOCAL_NOALIGN(virtual_mapped) @@ -208,7 +217,9 @@ SYM_CODE_START_LOCAL_NOALIGN(virtual_mapped) popl %edi popl %esi popl %ebx - RET + ANNOTATE_UNRET_SAFE + ret + int3 SYM_CODE_END(virtual_mapped) /* Do the copies */ @@ -271,7 +282,9 @@ SYM_CODE_START_LOCAL_NOALIGN(swap_pages) popl %edi popl %ebx popl %ebp - RET + ANNOTATE_UNRET_SAFE + ret + int3 SYM_CODE_END(swap_pages) .globl kexec_control_code_size diff --git a/arch/x86/kernel/relocate_kernel_64.S b/arch/x86/kernel/relocate_kernel_64.S index 5019091af059ec..8a9cea950e3986 100644 --- a/arch/x86/kernel/relocate_kernel_64.S +++ b/arch/x86/kernel/relocate_kernel_64.S @@ -13,7 +13,8 @@ #include /* - * Must be relocatable PIC code callable as a C function + * Must be relocatable PIC code callable as a C function, in particular + * there must be a plain RET and not jump to return thunk. */ #define PTR(x) (x << 3) @@ -104,7 +105,9 @@ SYM_CODE_START_NOALIGN(relocate_kernel) /* jump to identity mapped page */ addq $(identity_mapped - relocate_kernel), %r8 pushq %r8 - RET + ANNOTATE_UNRET_SAFE + ret + int3 SYM_CODE_END(relocate_kernel) SYM_CODE_START_LOCAL_NOALIGN(identity_mapped) @@ -191,7 +194,9 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped) xorl %r14d, %r14d xorl %r15d, %r15d - RET + ANNOTATE_UNRET_SAFE + ret + int3 1: popq %rdx @@ -210,7 +215,9 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped) call swap_pages movq $virtual_mapped, %rax pushq %rax - RET + ANNOTATE_UNRET_SAFE + ret + int3 SYM_CODE_END(identity_mapped) SYM_CODE_START_LOCAL_NOALIGN(virtual_mapped) @@ -231,7 +238,9 @@ SYM_CODE_START_LOCAL_NOALIGN(virtual_mapped) popq %r12 popq %rbp popq %rbx - RET + ANNOTATE_UNRET_SAFE + ret + int3 SYM_CODE_END(virtual_mapped) /* Do the copies */ @@ -288,7 +297,9 @@ SYM_CODE_START_LOCAL_NOALIGN(swap_pages) lea PAGE_SIZE(%rax), %rsi jmp 0b 3: - RET + ANNOTATE_UNRET_SAFE + ret + int3 SYM_CODE_END(swap_pages) .globl kexec_control_code_size From 73ad137d1146eabd8c82496a47477006dce3b91d Mon Sep 17 00:00:00 2001 From: Pawan Gupta Date: Fri, 8 Jul 2022 13:36:09 -0700 Subject: [PATCH 0739/1056] x86/speculation: Disable RRSBA behavior commit 4ad3278df6fe2b0852b00d5757fc2ccd8e92c26e upstream. Some Intel processors may use alternate predictors for RETs on RSB-underflow. This condition may be vulnerable to Branch History Injection (BHI) and intramode-BTI. Kernel earlier added spectre_v2 mitigation modes (eIBRS+Retpolines, eIBRS+LFENCE, Retpolines) which protect indirect CALLs and JMPs against such attacks. However, on RSB-underflow, RET target prediction may fallback to alternate predictors. As a result, RET's predicted target may get influenced by branch history. A new MSR_IA32_SPEC_CTRL bit (RRSBA_DIS_S) controls this fallback behavior when in kernel mode. When set, RETs will not take predictions from alternate predictors, hence mitigating RETs as well. Support for this is enumerated by CPUID.7.2.EDX[RRSBA_CTRL] (bit2). For spectre v2 mitigation, when a user selects a mitigation that protects indirect CALLs and JMPs against BHI and intramode-BTI, set RRSBA_DIS_S also to protect RETs for RSB-underflow case. Signed-off-by: Pawan Gupta Signed-off-by: Borislav Petkov [cascardo: no X86_FEATURE_INTEL_PPIN] Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/cpufeatures.h | 2 +- arch/x86/include/asm/msr-index.h | 9 +++++++++ arch/x86/kernel/cpu/bugs.c | 26 ++++++++++++++++++++++++++ arch/x86/kernel/cpu/scattered.c | 1 + tools/arch/x86/include/asm/msr-index.h | 9 +++++++++ 5 files changed, 46 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 27f2ebf3bd2725..3781a7f489ef37 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -295,7 +295,7 @@ #define X86_FEATURE_SGX1 (11*32+ 8) /* "" Basic SGX */ #define X86_FEATURE_SGX2 (11*32+ 9) /* "" SGX Enclave Dynamic Memory Management (EDMM) */ #define X86_FEATURE_ENTRY_IBPB (11*32+10) /* "" Issue an IBPB on kernel entry */ -/* FREE! (11*32+11) */ +#define X86_FEATURE_RRSBA_CTRL (11*32+11) /* "" RET prediction control */ #define X86_FEATURE_RETPOLINE (11*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */ #define X86_FEATURE_RETPOLINE_LFENCE (11*32+13) /* "" Use LFENCE for Spectre variant 2 */ #define X86_FEATURE_RETHUNK (11*32+14) /* "" Use REturn THUNK */ diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index aecc76b4dfbeb0..ec2967e7249f51 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -51,6 +51,8 @@ #define SPEC_CTRL_STIBP BIT(SPEC_CTRL_STIBP_SHIFT) /* STIBP mask */ #define SPEC_CTRL_SSBD_SHIFT 2 /* Speculative Store Bypass Disable bit */ #define SPEC_CTRL_SSBD BIT(SPEC_CTRL_SSBD_SHIFT) /* Speculative Store Bypass Disable */ +#define SPEC_CTRL_RRSBA_DIS_S_SHIFT 6 /* Disable RRSBA behavior */ +#define SPEC_CTRL_RRSBA_DIS_S BIT(SPEC_CTRL_RRSBA_DIS_S_SHIFT) #define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */ #define PRED_CMD_IBPB BIT(0) /* Indirect Branch Prediction Barrier */ @@ -139,6 +141,13 @@ * bit available to control VERW * behavior. */ +#define ARCH_CAP_RRSBA BIT(19) /* + * Indicates RET may use predictors + * other than the RSB. With eIBRS + * enabled predictions in kernel mode + * are restricted to targets in + * kernel. + */ #define MSR_IA32_FLUSH_CMD 0x0000010b #define L1D_FLUSH BIT(0) /* diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 921d718e7f028c..0e6b6c9527022b 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -1311,6 +1311,22 @@ static enum spectre_v2_mitigation __init spectre_v2_select_retpoline(void) return SPECTRE_V2_RETPOLINE; } +/* Disable in-kernel use of non-RSB RET predictors */ +static void __init spec_ctrl_disable_kernel_rrsba(void) +{ + u64 ia32_cap; + + if (!boot_cpu_has(X86_FEATURE_RRSBA_CTRL)) + return; + + ia32_cap = x86_read_arch_cap_msr(); + + if (ia32_cap & ARCH_CAP_RRSBA) { + x86_spec_ctrl_base |= SPEC_CTRL_RRSBA_DIS_S; + write_spec_ctrl_current(x86_spec_ctrl_base, true); + } +} + static void __init spectre_v2_select_mitigation(void) { enum spectre_v2_mitigation_cmd cmd = spectre_v2_parse_cmdline(); @@ -1405,6 +1421,16 @@ static void __init spectre_v2_select_mitigation(void) break; } + /* + * Disable alternate RSB predictions in kernel when indirect CALLs and + * JMPs gets protection against BHI and Intramode-BTI, but RET + * prediction from a non-RSB predictor is still a risk. + */ + if (mode == SPECTRE_V2_EIBRS_LFENCE || + mode == SPECTRE_V2_EIBRS_RETPOLINE || + mode == SPECTRE_V2_RETPOLINE) + spec_ctrl_disable_kernel_rrsba(); + spectre_v2_enabled = mode; pr_info("%s\n", spectre_v2_strings[mode]); diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c index 21d1f062895a8d..06bfef1c4175e1 100644 --- a/arch/x86/kernel/cpu/scattered.c +++ b/arch/x86/kernel/cpu/scattered.c @@ -26,6 +26,7 @@ struct cpuid_bit { static const struct cpuid_bit cpuid_bits[] = { { X86_FEATURE_APERFMPERF, CPUID_ECX, 0, 0x00000006, 0 }, { X86_FEATURE_EPB, CPUID_ECX, 3, 0x00000006, 0 }, + { X86_FEATURE_RRSBA_CTRL, CPUID_EDX, 2, 0x00000007, 2 }, { X86_FEATURE_CQM_LLC, CPUID_EDX, 1, 0x0000000f, 0 }, { X86_FEATURE_CQM_OCCUP_LLC, CPUID_EDX, 0, 0x0000000f, 1 }, { X86_FEATURE_CQM_MBM_TOTAL, CPUID_EDX, 1, 0x0000000f, 1 }, diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h index 7dc5a3306f37a7..3923e6c079faf0 100644 --- a/tools/arch/x86/include/asm/msr-index.h +++ b/tools/arch/x86/include/asm/msr-index.h @@ -51,6 +51,8 @@ #define SPEC_CTRL_STIBP BIT(SPEC_CTRL_STIBP_SHIFT) /* STIBP mask */ #define SPEC_CTRL_SSBD_SHIFT 2 /* Speculative Store Bypass Disable bit */ #define SPEC_CTRL_SSBD BIT(SPEC_CTRL_SSBD_SHIFT) /* Speculative Store Bypass Disable */ +#define SPEC_CTRL_RRSBA_DIS_S_SHIFT 6 /* Disable RRSBA behavior */ +#define SPEC_CTRL_RRSBA_DIS_S BIT(SPEC_CTRL_RRSBA_DIS_S_SHIFT) #define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */ #define PRED_CMD_IBPB BIT(0) /* Indirect Branch Prediction Barrier */ @@ -138,6 +140,13 @@ * bit available to control VERW * behavior. */ +#define ARCH_CAP_RRSBA BIT(19) /* + * Indicates RET may use predictors + * other than the RSB. With eIBRS + * enabled predictions in kernel mode + * are restricted to targets in + * kernel. + */ #define MSR_IA32_FLUSH_CMD 0x0000010b #define L1D_FLUSH BIT(0) /* From 86ccf19a91beed4c1d1a2c9dc9c7af1f991a7fea Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 12 Jul 2022 14:01:06 +0200 Subject: [PATCH 0740/1056] x86/static_call: Serialize __static_call_fixup() properly commit c27c753ea6fd1237f4f96abf8b623d7bab505513 upstream. __static_call_fixup() invokes __static_call_transform() without holding text_mutex, which causes lockdep to complain in text_poke_bp(). Adding the proper locking cures that, but as this is either used during early boot or during module finalizing, it's not required to use text_poke_bp(). Add an argument to __static_call_transform() which tells it to use text_poke_early() for it. Fixes: ee88d363d156 ("x86,static_call: Use alternative RET encoding") Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/static_call.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/arch/x86/kernel/static_call.c b/arch/x86/kernel/static_call.c index 6b0c6c22880a12..2fc4f96702e622 100644 --- a/arch/x86/kernel/static_call.c +++ b/arch/x86/kernel/static_call.c @@ -25,7 +25,8 @@ static const u8 xor5rax[] = { 0x2e, 0x2e, 0x2e, 0x31, 0xc0 }; static const u8 retinsn[] = { RET_INSN_OPCODE, 0xcc, 0xcc, 0xcc, 0xcc }; -static void __ref __static_call_transform(void *insn, enum insn_type type, void *func) +static void __ref __static_call_transform(void *insn, enum insn_type type, + void *func, bool modinit) { const void *emulate = NULL; int size = CALL_INSN_SIZE; @@ -60,7 +61,7 @@ static void __ref __static_call_transform(void *insn, enum insn_type type, void if (memcmp(insn, code, size) == 0) return; - if (unlikely(system_state == SYSTEM_BOOTING)) + if (system_state == SYSTEM_BOOTING || modinit) return text_poke_early(insn, code, size); text_poke_bp(insn, code, size, emulate); @@ -108,12 +109,12 @@ void arch_static_call_transform(void *site, void *tramp, void *func, bool tail) if (tramp) { __static_call_validate(tramp, true); - __static_call_transform(tramp, __sc_insn(!func, true), func); + __static_call_transform(tramp, __sc_insn(!func, true), func, false); } if (IS_ENABLED(CONFIG_HAVE_STATIC_CALL_INLINE) && site) { __static_call_validate(site, tail); - __static_call_transform(site, __sc_insn(!func, tail), func); + __static_call_transform(site, __sc_insn(!func, tail), func, false); } mutex_unlock(&text_mutex); @@ -139,8 +140,10 @@ bool __static_call_fixup(void *tramp, u8 op, void *dest) return false; } + mutex_lock(&text_mutex); if (op == RET_INSN_OPCODE || dest == &__x86_return_thunk) - __static_call_transform(tramp, RET, NULL); + __static_call_transform(tramp, RET, NULL, true); + mutex_unlock(&text_mutex); return true; } From 96907c5d3927515029ad1f38a31d56dc04b8d288 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Thu, 14 Jul 2022 00:39:33 +0200 Subject: [PATCH 0741/1056] x86/xen: Fix initialisation in hypercall_page after rethunk The hypercall_page is special and the RETs there should not be changed into rethunk calls (but can have SLS mitigation). Change the initial instructions to ret + int3 padding, as was done in upstream commit 5b2fc51576ef "x86/ibt,xen: Sprinkle the ENDBR". Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- arch/x86/xen/xen-head.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S index 38b73e7e54ba7c..2a3ef5fcba34b9 100644 --- a/arch/x86/xen/xen-head.S +++ b/arch/x86/xen/xen-head.S @@ -69,9 +69,9 @@ SYM_CODE_END(asm_cpu_bringup_and_idle) SYM_CODE_START(hypercall_page) .rept (PAGE_SIZE / 32) UNWIND_HINT_FUNC - .skip 31, 0x90 ANNOTATE_UNRET_SAFE - RET + ret + .skip 31, 0xcc .endr #define HYPERCALL(n) \ From 67040d1d922be2bb58e693032067ffa97aac46ad Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Wed, 13 Jul 2022 11:50:46 +0200 Subject: [PATCH 0742/1056] x86/asm/32: Fix ANNOTATE_UNRET_SAFE use on 32-bit commit 3131ef39fb03bbde237d0b8260445898f3dfda5b upstream. The build on x86_32 currently fails after commit 9bb2ec608a20 (objtool: Update Retpoline validation) with: arch/x86/kernel/../../x86/xen/xen-head.S:35: Error: no such instruction: `annotate_unret_safe' ANNOTATE_UNRET_SAFE is defined in nospec-branch.h. And head_32.S is missing this include. Fix this. Fixes: 9bb2ec608a20 ("objtool: Update Retpoline validation") Signed-off-by: Jiri Slaby Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/63e23f80-033f-f64e-7522-2816debbc367@kernel.org Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/head_32.S | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index eb8656bac99b6b..9b7acc9c7874c1 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include From d0caa861a836fe4d85749e8a8d16623bb91b39d3 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Wed, 13 Jul 2022 08:24:37 -0700 Subject: [PATCH 0743/1056] x86/speculation: Use DECLARE_PER_CPU for x86_spec_ctrl_current commit db886979683a8360ced9b24ab1125ad0c4d2cf76 upstream. Clang warns: arch/x86/kernel/cpu/bugs.c:58:21: error: section attribute is specified on redeclared variable [-Werror,-Wsection] DEFINE_PER_CPU(u64, x86_spec_ctrl_current); ^ arch/x86/include/asm/nospec-branch.h:283:12: note: previous declaration is here extern u64 x86_spec_ctrl_current; ^ 1 error generated. The declaration should be using DECLARE_PER_CPU instead so all attributes stay in sync. Cc: stable@vger.kernel.org Fixes: fc02735b14ff ("KVM: VMX: Prevent guest RSB poisoning attacks with eIBRS") Reported-by: kernel test robot Signed-off-by: Nathan Chancellor Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/nospec-branch.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index b73b87006d37aa..0a87b2bc4ef945 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -11,6 +11,7 @@ #include #include #include +#include #define RETPOLINE_THUNK_SIZE 32 @@ -281,7 +282,7 @@ static inline void indirect_branch_prediction_barrier(void) /* The Intel SPEC CTRL MSR base value cache */ extern u64 x86_spec_ctrl_base; -extern u64 x86_spec_ctrl_current; +DECLARE_PER_CPU(u64, x86_spec_ctrl_current); extern void write_spec_ctrl_current(u64 val, bool force); extern u64 spec_ctrl_current(void); From 554cf28b104d6f4a215304bbaae4bf7713b0ea0d Mon Sep 17 00:00:00 2001 From: Thadeu Lima de Souza Cascardo Date: Fri, 15 Jul 2022 16:45:50 -0300 Subject: [PATCH 0744/1056] efi/x86: use naked RET on mixed mode call wrapper commit 51a6fa0732d6be6a44e0032752ad2ac10d67c796 upstream. When running with return thunks enabled under 32-bit EFI, the system crashes with: kernel tried to execute NX-protected page - exploit attempt? (uid: 0) BUG: unable to handle page fault for address: 000000005bc02900 #PF: supervisor instruction fetch in kernel mode #PF: error_code(0x0011) - permissions violation PGD 18f7063 P4D 18f7063 PUD 18ff063 PMD 190e063 PTE 800000005bc02063 Oops: 0011 [#1] PREEMPT SMP PTI CPU: 0 PID: 0 Comm: swapper/0 Not tainted 5.19.0-rc6+ #166 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 0.0.0 02/06/2015 RIP: 0010:0x5bc02900 Code: Unable to access opcode bytes at RIP 0x5bc028d6. RSP: 0018:ffffffffb3203e10 EFLAGS: 00010046 RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000048 RDX: 000000000190dfac RSI: 0000000000001710 RDI: 000000007eae823b RBP: ffffffffb3203e70 R08: 0000000001970000 R09: ffffffffb3203e28 R10: 747563657865206c R11: 6c6977203a696665 R12: 0000000000001710 R13: 0000000000000030 R14: 0000000001970000 R15: 0000000000000001 FS: 0000000000000000(0000) GS:ffff8e013ca00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0018 ES: 0018 CR0: 0000000080050033 CR2: 000000005bc02900 CR3: 0000000001930000 CR4: 00000000000006f0 Call Trace: ? efi_set_virtual_address_map+0x9c/0x175 efi_enter_virtual_mode+0x4a6/0x53e start_kernel+0x67c/0x71e x86_64_start_reservations+0x24/0x2a x86_64_start_kernel+0xe9/0xf4 secondary_startup_64_no_verify+0xe5/0xeb That's because it cannot jump to the return thunk from the 32-bit code. Using a naked RET and marking it as safe allows the system to proceed booting. Fixes: aa3d480315ba ("x86: Use return-thunk in asm code") Reported-by: Guenter Roeck Signed-off-by: Thadeu Lima de Souza Cascardo Cc: Peter Zijlstra (Intel) Cc: Borislav Petkov Cc: Josh Poimboeuf Cc: Tested-by: Guenter Roeck Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- arch/x86/platform/efi/efi_thunk_64.S | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/x86/platform/efi/efi_thunk_64.S b/arch/x86/platform/efi/efi_thunk_64.S index f2a8eec69f8f64..a7ffe30e861437 100644 --- a/arch/x86/platform/efi/efi_thunk_64.S +++ b/arch/x86/platform/efi/efi_thunk_64.S @@ -22,6 +22,7 @@ #include #include #include +#include .text .code64 @@ -63,7 +64,9 @@ SYM_CODE_START(__efi64_thunk) 1: movq 24(%rsp), %rsp pop %rbx pop %rbp - RET + ANNOTATE_UNRET_SAFE + ret + int3 .code32 2: pushl $__KERNEL_CS From 198685e58b134679e6079d872396f4352780282f Mon Sep 17 00:00:00 2001 From: Thadeu Lima de Souza Cascardo Date: Wed, 13 Jul 2022 14:12:41 -0300 Subject: [PATCH 0745/1056] x86/kvm: fix FASTOP_SIZE when return thunks are enabled commit 84e7051c0bc1f2a13101553959b3a9d9a8e24939 upstream. The return thunk call makes the fastop functions larger, just like IBT does. Consider a 16-byte FASTOP_SIZE when CONFIG_RETHUNK is enabled. Otherwise, functions will be incorrectly aligned and when computing their position for differently sized operators, they will executed in the middle or end of a function, which may as well be an int3, leading to a crash like: [ 36.091116] int3: 0000 [#1] SMP NOPTI [ 36.091119] CPU: 3 PID: 1371 Comm: qemu-system-x86 Not tainted 5.15.0-41-generic #44 [ 36.091120] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.15.0-1 04/01/2014 [ 36.091121] RIP: 0010:xaddw_ax_dx+0x9/0x10 [kvm] [ 36.091185] Code: 00 0f bb d0 c3 cc cc cc cc 48 0f bb d0 c3 cc cc cc cc 0f 1f 80 00 00 00 00 0f c0 d0 c3 cc cc cc cc 66 0f c1 d0 c3 cc cc cc cc <0f> 1f 80 00 00 00 00 0f c1 d0 c3 cc cc cc cc 48 0f c1 d0 c3 cc cc [ 36.091186] RSP: 0018:ffffb1f541143c98 EFLAGS: 00000202 [ 36.091188] RAX: 0000000089abcdef RBX: 0000000000000001 RCX: 0000000000000000 [ 36.091188] RDX: 0000000076543210 RSI: ffffffffc073c6d0 RDI: 0000000000000200 [ 36.091189] RBP: ffffb1f541143ca0 R08: ffff9f1803350a70 R09: 0000000000000002 [ 36.091190] R10: ffff9f1803350a70 R11: 0000000000000000 R12: ffff9f1803350a70 [ 36.091190] R13: ffffffffc077fee0 R14: 0000000000000000 R15: 0000000000000000 [ 36.091191] FS: 00007efdfce8d640(0000) GS:ffff9f187dd80000(0000) knlGS:0000000000000000 [ 36.091192] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 36.091192] CR2: 0000000000000000 CR3: 0000000009b62002 CR4: 0000000000772ee0 [ 36.091195] PKRU: 55555554 [ 36.091195] Call Trace: [ 36.091197] [ 36.091198] ? fastop+0x5a/0xa0 [kvm] [ 36.091222] x86_emulate_insn+0x7b8/0xe90 [kvm] [ 36.091244] x86_emulate_instruction+0x2f4/0x630 [kvm] [ 36.091263] ? kvm_arch_vcpu_load+0x7c/0x230 [kvm] [ 36.091283] ? vmx_prepare_switch_to_host+0xf7/0x190 [kvm_intel] [ 36.091290] complete_emulated_mmio+0x297/0x320 [kvm] [ 36.091310] kvm_arch_vcpu_ioctl_run+0x32f/0x550 [kvm] [ 36.091330] kvm_vcpu_ioctl+0x29e/0x6d0 [kvm] [ 36.091344] ? kvm_vcpu_ioctl+0x120/0x6d0 [kvm] [ 36.091357] ? __fget_files+0x86/0xc0 [ 36.091362] ? __fget_files+0x86/0xc0 [ 36.091363] __x64_sys_ioctl+0x92/0xd0 [ 36.091366] do_syscall_64+0x59/0xc0 [ 36.091369] ? syscall_exit_to_user_mode+0x27/0x50 [ 36.091370] ? do_syscall_64+0x69/0xc0 [ 36.091371] ? syscall_exit_to_user_mode+0x27/0x50 [ 36.091372] ? __x64_sys_writev+0x1c/0x30 [ 36.091374] ? do_syscall_64+0x69/0xc0 [ 36.091374] ? exit_to_user_mode_prepare+0x37/0xb0 [ 36.091378] ? syscall_exit_to_user_mode+0x27/0x50 [ 36.091379] ? do_syscall_64+0x69/0xc0 [ 36.091379] ? do_syscall_64+0x69/0xc0 [ 36.091380] ? do_syscall_64+0x69/0xc0 [ 36.091381] ? do_syscall_64+0x69/0xc0 [ 36.091381] entry_SYSCALL_64_after_hwframe+0x61/0xcb [ 36.091384] RIP: 0033:0x7efdfe6d1aff [ 36.091390] Code: 00 48 89 44 24 18 31 c0 48 8d 44 24 60 c7 04 24 10 00 00 00 48 89 44 24 08 48 8d 44 24 20 48 89 44 24 10 b8 10 00 00 00 0f 05 <41> 89 c0 3d 00 f0 ff ff 77 1f 48 8b 44 24 18 64 48 2b 04 25 28 00 [ 36.091391] RSP: 002b:00007efdfce8c460 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 [ 36.091393] RAX: ffffffffffffffda RBX: 000000000000ae80 RCX: 00007efdfe6d1aff [ 36.091393] RDX: 0000000000000000 RSI: 000000000000ae80 RDI: 000000000000000c [ 36.091394] RBP: 0000558f1609e220 R08: 0000558f13fb8190 R09: 00000000ffffffff [ 36.091394] R10: 0000558f16b5e950 R11: 0000000000000246 R12: 0000000000000000 [ 36.091394] R13: 0000000000000001 R14: 0000000000000000 R15: 0000000000000000 [ 36.091396] [ 36.091397] Modules linked in: isofs nls_iso8859_1 kvm_intel joydev kvm input_leds serio_raw sch_fq_codel dm_multipath scsi_dh_rdac scsi_dh_emc scsi_dh_alua ipmi_devintf ipmi_msghandler drm msr ip_tables x_tables autofs4 btrfs blake2b_generic zstd_compress raid10 raid456 async_raid6_recov async_memcpy async_pq async_xor async_tx xor raid6_pq libcrc32c raid1 raid0 multipath linear crct10dif_pclmul crc32_pclmul ghash_clmulni_intel aesni_intel virtio_net net_failover crypto_simd ahci xhci_pci cryptd psmouse virtio_blk libahci xhci_pci_renesas failover [ 36.123271] ---[ end trace db3c0ab5a48fabcc ]--- [ 36.123272] RIP: 0010:xaddw_ax_dx+0x9/0x10 [kvm] [ 36.123319] Code: 00 0f bb d0 c3 cc cc cc cc 48 0f bb d0 c3 cc cc cc cc 0f 1f 80 00 00 00 00 0f c0 d0 c3 cc cc cc cc 66 0f c1 d0 c3 cc cc cc cc <0f> 1f 80 00 00 00 00 0f c1 d0 c3 cc cc cc cc 48 0f c1 d0 c3 cc cc [ 36.123320] RSP: 0018:ffffb1f541143c98 EFLAGS: 00000202 [ 36.123321] RAX: 0000000089abcdef RBX: 0000000000000001 RCX: 0000000000000000 [ 36.123321] RDX: 0000000076543210 RSI: ffffffffc073c6d0 RDI: 0000000000000200 [ 36.123322] RBP: ffffb1f541143ca0 R08: ffff9f1803350a70 R09: 0000000000000002 [ 36.123322] R10: ffff9f1803350a70 R11: 0000000000000000 R12: ffff9f1803350a70 [ 36.123323] R13: ffffffffc077fee0 R14: 0000000000000000 R15: 0000000000000000 [ 36.123323] FS: 00007efdfce8d640(0000) GS:ffff9f187dd80000(0000) knlGS:0000000000000000 [ 36.123324] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 36.123325] CR2: 0000000000000000 CR3: 0000000009b62002 CR4: 0000000000772ee0 [ 36.123327] PKRU: 55555554 [ 36.123328] Kernel panic - not syncing: Fatal exception in interrupt [ 36.123410] Kernel Offset: 0x1400000 from 0xffffffff81000000 (relocation range: 0xffffffff80000000-0xffffffffbfffffff) [ 36.135305] ---[ end Kernel panic - not syncing: Fatal exception in interrupt ]--- Fixes: aa3d480315ba ("x86: Use return-thunk in asm code") Signed-off-by: Thadeu Lima de Souza Cascardo Co-developed-by: Peter Zijlstra (Intel) Cc: Borislav Petkov Cc: Josh Poimboeuf Cc: Paolo Bonzini Reported-by: Linux Kernel Functional Testing Message-Id: <20220713171241.184026-1-cascardo@canonical.com> Tested-by: Jack Wang Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/emulate.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 33feb282886670..b3d30f5911133a 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -187,8 +187,12 @@ #define X8(x...) X4(x), X4(x) #define X16(x...) X8(x), X8(x) -#define NR_FASTOP (ilog2(sizeof(ulong)) + 1) -#define FASTOP_SIZE 8 +#define NR_FASTOP (ilog2(sizeof(ulong)) + 1) +#define RET_LENGTH (1 + (4 * IS_ENABLED(CONFIG_RETHUNK)) + \ + IS_ENABLED(CONFIG_SLS)) +#define FASTOP_LENGTH (ENDBR_INSN_SIZE + 7 + RET_LENGTH) +#define FASTOP_SIZE (8 << ((FASTOP_LENGTH > 8) & 1) << ((FASTOP_LENGTH > 16) & 1)) +static_assert(FASTOP_LENGTH <= FASTOP_SIZE); struct opcode { u64 flags : 56; @@ -437,8 +441,6 @@ static int fastop(struct x86_emulate_ctxt *ctxt, fastop_t fop); * RET | JMP __x86_return_thunk [1,5 bytes; CONFIG_RETHUNK] * INT3 [1 byte; CONFIG_SLS] */ -#define RET_LENGTH (1 + (4 * IS_ENABLED(CONFIG_RETHUNK)) + \ - IS_ENABLED(CONFIG_SLS)) #define SETCC_LENGTH (3 + RET_LENGTH) #define SETCC_ALIGN (4 << ((SETCC_LENGTH > 4) & 1) << ((SETCC_LENGTH > 8) & 1)) static_assert(SETCC_LENGTH <= SETCC_ALIGN); From 5f4e77cc9abf65c4593cd255d8c3cd6c5d0ca7f6 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 15 Jul 2022 07:34:55 -0400 Subject: [PATCH 0746/1056] KVM: emulate: do not adjust size of fastop and setcc subroutines commit 79629181607e801c0b41b8790ac4ee2eb5d7bc3e upstream. Instead of doing complicated calculations to find the size of the subroutines (which are even more complicated because they need to be stringified into an asm statement), just hardcode to 16. It is less dense for a few combinations of IBT/SLS/retbleed, but it has the advantage of being really simple. Cc: stable@vger.kernel.org # 5.15.x: 84e7051c0bc1: x86/kvm: fix FASTOP_SIZE when return thunks are enabled Cc: stable@vger.kernel.org Suggested-by: Linus Torvalds Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/emulate.c | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index b3d30f5911133a..318a78379ca6c0 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -187,13 +187,6 @@ #define X8(x...) X4(x), X4(x) #define X16(x...) X8(x), X8(x) -#define NR_FASTOP (ilog2(sizeof(ulong)) + 1) -#define RET_LENGTH (1 + (4 * IS_ENABLED(CONFIG_RETHUNK)) + \ - IS_ENABLED(CONFIG_SLS)) -#define FASTOP_LENGTH (ENDBR_INSN_SIZE + 7 + RET_LENGTH) -#define FASTOP_SIZE (8 << ((FASTOP_LENGTH > 8) & 1) << ((FASTOP_LENGTH > 16) & 1)) -static_assert(FASTOP_LENGTH <= FASTOP_SIZE); - struct opcode { u64 flags : 56; u64 intercept : 8; @@ -307,9 +300,15 @@ static void invalidate_registers(struct x86_emulate_ctxt *ctxt) * Moreover, they are all exactly FASTOP_SIZE bytes long, so functions for * different operand sizes can be reached by calculation, rather than a jump * table (which would be bigger than the code). + * + * The 16 byte alignment, considering 5 bytes for the RET thunk, 3 for ENDBR + * and 1 for the straight line speculation INT3, leaves 7 bytes for the + * body of the function. Currently none is larger than 4. */ static int fastop(struct x86_emulate_ctxt *ctxt, fastop_t fop); +#define FASTOP_SIZE 16 + #define __FOP_FUNC(name) \ ".align " __stringify(FASTOP_SIZE) " \n\t" \ ".type " name ", @function \n\t" \ @@ -441,9 +440,7 @@ static int fastop(struct x86_emulate_ctxt *ctxt, fastop_t fop); * RET | JMP __x86_return_thunk [1,5 bytes; CONFIG_RETHUNK] * INT3 [1 byte; CONFIG_SLS] */ -#define SETCC_LENGTH (3 + RET_LENGTH) -#define SETCC_ALIGN (4 << ((SETCC_LENGTH > 4) & 1) << ((SETCC_LENGTH > 8) & 1)) -static_assert(SETCC_LENGTH <= SETCC_ALIGN); +#define SETCC_ALIGN 16 #define FOP_SETCC(op) \ ".align " __stringify(SETCC_ALIGN) " \n\t" \ From 1dd6c13b437c0d7c997c16fcb73afdf67e625c33 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 1 Jul 2021 13:32:18 -0300 Subject: [PATCH 0747/1056] tools arch x86: Sync the msr-index.h copy with the kernel sources commit 91d248c3b903b46a58cbc7e8d38d684d3e4007c2 upstream. To pick up the changes from these csets: 4ad3278df6fe2b08 ("x86/speculation: Disable RRSBA behavior") d7caac991feeef1b ("x86/cpu/amd: Add Spectral Chicken") That cause no changes to tooling: $ tools/perf/trace/beauty/tracepoints/x86_msr.sh > before $ cp arch/x86/include/asm/msr-index.h tools/arch/x86/include/asm/msr-index.h $ tools/perf/trace/beauty/tracepoints/x86_msr.sh > after $ diff -u before after $ Just silences this perf build warning: Warning: Kernel ABI header at 'tools/arch/x86/include/asm/msr-index.h' differs from latest version at 'arch/x86/include/asm/msr-index.h' diff -u tools/arch/x86/include/asm/msr-index.h arch/x86/include/asm/msr-index.h Cc: Adrian Hunter Cc: Borislav Petkov Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Cc: Pawan Gupta Cc: Peter Zijlstra Link: https://lore.kernel.org/lkml/YtQTm9wsB3hxQWvy@kernel.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Greg Kroah-Hartman --- tools/arch/x86/include/asm/msr-index.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h index 3923e6c079faf0..ec2967e7249f51 100644 --- a/tools/arch/x86/include/asm/msr-index.h +++ b/tools/arch/x86/include/asm/msr-index.h @@ -93,6 +93,7 @@ #define MSR_IA32_ARCH_CAPABILITIES 0x0000010a #define ARCH_CAP_RDCL_NO BIT(0) /* Not susceptible to Meltdown */ #define ARCH_CAP_IBRS_ALL BIT(1) /* Enhanced IBRS support */ +#define ARCH_CAP_RSBA BIT(2) /* RET may use alternative branch predictors */ #define ARCH_CAP_SKIP_VMENTRY_L1DFLUSH BIT(3) /* Skip L1D flush on vmentry */ #define ARCH_CAP_SSB_NO BIT(4) /* * Not susceptible to Speculative Store Bypass @@ -523,6 +524,9 @@ /* Fam 17h MSRs */ #define MSR_F17H_IRPERF 0xc00000e9 +#define MSR_ZEN2_SPECTRAL_CHICKEN 0xc00110e3 +#define MSR_ZEN2_SPECTRAL_CHICKEN_BIT BIT_ULL(1) + /* Fam 16h MSRs */ #define MSR_F16H_L2I_PERF_CTL 0xc0010230 #define MSR_F16H_L2I_PERF_CTR 0xc0010231 From ac8edadc2b1e96caca7270dc14640c4e424f4280 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 1 Jul 2021 13:39:15 -0300 Subject: [PATCH 0748/1056] tools headers cpufeatures: Sync with the kernel sources commit f098addbdb44c8a565367f5162f3ab170ed9404a upstream. To pick the changes from: f43b9876e857c739 ("x86/retbleed: Add fine grained Kconfig knobs") a149180fbcf336e9 ("x86: Add magic AMD return-thunk") 15e67227c49a5783 ("x86: Undo return-thunk damage") 369ae6ffc41a3c11 ("x86/retpoline: Cleanup some #ifdefery") 4ad3278df6fe2b08 x86/speculation: Disable RRSBA behavior 26aae8ccbc197223 x86/cpu/amd: Enumerate BTC_NO 9756bba28470722d x86/speculation: Fill RSB on vmexit for IBRS 3ebc170068885b6f x86/bugs: Add retbleed=ibpb 2dbb887e875b1de3 x86/entry: Add kernel IBRS implementation 6b80b59b35557065 x86/bugs: Report AMD retbleed vulnerability a149180fbcf336e9 x86: Add magic AMD return-thunk 15e67227c49a5783 x86: Undo return-thunk damage a883d624aed463c8 x86/cpufeatures: Move RETPOLINE flags to word 11 51802186158c74a0 x86/speculation/mmio: Enumerate Processor MMIO Stale Data bug This only causes these perf files to be rebuilt: CC /tmp/build/perf/bench/mem-memcpy-x86-64-asm.o CC /tmp/build/perf/bench/mem-memset-x86-64-asm.o And addresses this perf build warning: Warning: Kernel ABI header at 'tools/arch/x86/include/asm/cpufeatures.h' differs from latest version at 'arch/x86/include/asm/cpufeatures.h' diff -u tools/arch/x86/include/asm/cpufeatures.h arch/x86/include/asm/cpufeatures.h Warning: Kernel ABI header at 'tools/arch/x86/include/asm/disabled-features.h' differs from latest version at 'arch/x86/include/asm/disabled-features.h' diff -u tools/arch/x86/include/asm/disabled-features.h arch/x86/include/asm/disabled-features.h Cc: Adrian Hunter Cc: Borislav Petkov Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Signed-off-by: Greg Kroah-Hartman --- tools/arch/x86/include/asm/cpufeatures.h | 12 +++++++++-- .../arch/x86/include/asm/disabled-features.h | 21 ++++++++++++++++++- 2 files changed, 30 insertions(+), 3 deletions(-) diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h index 96eefc05387a4a..3781a7f489ef37 100644 --- a/tools/arch/x86/include/asm/cpufeatures.h +++ b/tools/arch/x86/include/asm/cpufeatures.h @@ -203,8 +203,8 @@ #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ /* FREE! ( 7*32+10) */ #define X86_FEATURE_PTI ( 7*32+11) /* Kernel Page Table Isolation enabled */ -#define X86_FEATURE_RETPOLINE ( 7*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */ -#define X86_FEATURE_RETPOLINE_LFENCE ( 7*32+13) /* "" Use LFENCEs for Spectre variant 2 */ +#define X86_FEATURE_KERNEL_IBRS ( 7*32+12) /* "" Set/clear IBRS on kernel entry/exit */ +#define X86_FEATURE_RSB_VMEXIT ( 7*32+13) /* "" Fill RSB on VM-Exit */ #define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */ #define X86_FEATURE_CDP_L2 ( 7*32+15) /* Code and Data Prioritization L2 */ #define X86_FEATURE_MSR_SPEC_CTRL ( 7*32+16) /* "" MSR SPEC_CTRL is implemented */ @@ -294,6 +294,12 @@ #define X86_FEATURE_PER_THREAD_MBA (11*32+ 7) /* "" Per-thread Memory Bandwidth Allocation */ #define X86_FEATURE_SGX1 (11*32+ 8) /* "" Basic SGX */ #define X86_FEATURE_SGX2 (11*32+ 9) /* "" SGX Enclave Dynamic Memory Management (EDMM) */ +#define X86_FEATURE_ENTRY_IBPB (11*32+10) /* "" Issue an IBPB on kernel entry */ +#define X86_FEATURE_RRSBA_CTRL (11*32+11) /* "" RET prediction control */ +#define X86_FEATURE_RETPOLINE (11*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */ +#define X86_FEATURE_RETPOLINE_LFENCE (11*32+13) /* "" Use LFENCE for Spectre variant 2 */ +#define X86_FEATURE_RETHUNK (11*32+14) /* "" Use REturn THUNK */ +#define X86_FEATURE_UNRET (11*32+15) /* "" AMD BTB untrain return */ /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */ #define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */ @@ -313,6 +319,7 @@ #define X86_FEATURE_AMD_SSBD (13*32+24) /* "" Speculative Store Bypass Disable */ #define X86_FEATURE_VIRT_SSBD (13*32+25) /* Virtualized Speculative Store Bypass Disable */ #define X86_FEATURE_AMD_SSB_NO (13*32+26) /* "" Speculative Store Bypass is fixed in hardware. */ +#define X86_FEATURE_BTC_NO (13*32+29) /* "" Not vulnerable to Branch Type Confusion */ /* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 14 */ #define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */ @@ -437,5 +444,6 @@ #define X86_BUG_ITLB_MULTIHIT X86_BUG(23) /* CPU may incur MCE during certain page attribute changes */ #define X86_BUG_SRBDS X86_BUG(24) /* CPU may leak RNG bits if not mitigated */ #define X86_BUG_MMIO_STALE_DATA X86_BUG(25) /* CPU is affected by Processor MMIO Stale Data vulnerabilities */ +#define X86_BUG_RETBLEED X86_BUG(26) /* CPU is affected by RETBleed */ #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/tools/arch/x86/include/asm/disabled-features.h b/tools/arch/x86/include/asm/disabled-features.h index 8f28fafa98b32e..834a3b6d81e121 100644 --- a/tools/arch/x86/include/asm/disabled-features.h +++ b/tools/arch/x86/include/asm/disabled-features.h @@ -56,6 +56,25 @@ # define DISABLE_PTI (1 << (X86_FEATURE_PTI & 31)) #endif +#ifdef CONFIG_RETPOLINE +# define DISABLE_RETPOLINE 0 +#else +# define DISABLE_RETPOLINE ((1 << (X86_FEATURE_RETPOLINE & 31)) | \ + (1 << (X86_FEATURE_RETPOLINE_LFENCE & 31))) +#endif + +#ifdef CONFIG_RETHUNK +# define DISABLE_RETHUNK 0 +#else +# define DISABLE_RETHUNK (1 << (X86_FEATURE_RETHUNK & 31)) +#endif + +#ifdef CONFIG_CPU_UNRET_ENTRY +# define DISABLE_UNRET 0 +#else +# define DISABLE_UNRET (1 << (X86_FEATURE_UNRET & 31)) +#endif + /* Force disable because it's broken beyond repair */ #define DISABLE_ENQCMD (1 << (X86_FEATURE_ENQCMD & 31)) @@ -79,7 +98,7 @@ #define DISABLED_MASK8 0 #define DISABLED_MASK9 (DISABLE_SMAP|DISABLE_SGX) #define DISABLED_MASK10 0 -#define DISABLED_MASK11 0 +#define DISABLED_MASK11 (DISABLE_RETPOLINE|DISABLE_RETHUNK|DISABLE_UNRET) #define DISABLED_MASK12 0 #define DISABLED_MASK13 0 #define DISABLED_MASK14 0 From 320fc994f0c88d81dc684e346a7e115fa8364137 Mon Sep 17 00:00:00 2001 From: Kim Phillips Date: Fri, 8 Jul 2022 16:21:28 -0500 Subject: [PATCH 0749/1056] x86/bugs: Remove apostrophe typo commit bcf163150cd37348a0cb59e95c916a83a9344b0e upstream. Remove a superfluous ' in the mitigation string. Fixes: e8ec1b6e08a2 ("x86/bugs: Enable STIBP for JMP2RET") Signed-off-by: Kim Phillips Signed-off-by: Borislav Petkov Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 0e6b6c9527022b..650333fce795f9 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -1174,7 +1174,7 @@ spectre_v2_user_select_mitigation(void) if (retbleed_mitigation == RETBLEED_MITIGATION_UNRET) { if (mode != SPECTRE_V2_USER_STRICT && mode != SPECTRE_V2_USER_STRICT_PREFERRED) - pr_info("Selecting STIBP always-on mode to complement retbleed mitigation'\n"); + pr_info("Selecting STIBP always-on mode to complement retbleed mitigation\n"); mode = SPECTRE_V2_USER_STRICT_PREFERRED; } From 06741ef8e7cced9729e8061778bc97df58cd4649 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 14 Jul 2022 12:20:19 +0200 Subject: [PATCH 0750/1056] um: Add missing apply_returns() commit 564d998106397394b6aad260f219b882b3347e62 upstream. Implement apply_returns() stub for UM, just like all the other patching routines. Fixes: 15e67227c49a ("x86: Undo return-thunk damage") Reported-by: Randy Dunlap Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/Ys%2Ft45l%2FgarIrD0u@worktop.programming.kicks-ass.net Signed-off-by: Greg Kroah-Hartman --- arch/um/kernel/um_arch.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c index 54447690de1159..5aa5e7533b7574 100644 --- a/arch/um/kernel/um_arch.c +++ b/arch/um/kernel/um_arch.c @@ -425,6 +425,10 @@ void apply_retpolines(s32 *start, s32 *end) { } +void apply_returns(s32 *start, s32 *end) +{ +} + void apply_alternatives(struct alt_instr *start, struct alt_instr *end) { } From b4296a7b863f201f9be08fee6f8891f4912de011 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 19 Nov 2021 17:50:25 +0100 Subject: [PATCH 0751/1056] x86: Use -mindirect-branch-cs-prefix for RETPOLINE builds commit 68cf4f2a72ef8786e6b7af6fd9a89f27ac0f520d upstream. In order to further enable commit: bbe2df3f6b6d ("x86/alternative: Try inline spectre_v2=retpoline,amd") add the new GCC flag -mindirect-branch-cs-prefix: https://gcc.gnu.org/g:2196a681d7810ad8b227bf983f38ba716620545e https://gcc.gnu.org/bugzilla/show_bug.cgi?id=102952 https://bugs.llvm.org/show_bug.cgi?id=52323 to RETPOLINE=y builds. This should allow fully inlining retpoline,amd for GCC builds. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov Reviewed-by: Kees Cook Acked-by: Nick Desaulniers Link: https://lkml.kernel.org/r/20211119165630.276205624@infradead.org Signed-off-by: Greg Kroah-Hartman --- Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/Makefile b/Makefile index e3864845d64af7..2c6eb13250e922 100644 --- a/Makefile +++ b/Makefile @@ -687,6 +687,7 @@ endif ifdef CONFIG_CC_IS_GCC RETPOLINE_CFLAGS := $(call cc-option,-mindirect-branch=thunk-extern -mindirect-branch-register) +RETPOLINE_CFLAGS += $(call cc-option,-mindirect-branch-cs-prefix) RETPOLINE_VDSO_CFLAGS := $(call cc-option,-mindirect-branch=thunk-inline -mindirect-branch-register) endif ifdef CONFIG_CC_IS_CLANG From a9e2d8e52e1c0d87c0fa4f9d2d38e210aabed515 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sat, 23 Jul 2022 12:54:14 +0200 Subject: [PATCH 0752/1056] Linux 5.15.57 Link: https://lore.kernel.org/r/20220722091133.320803732@linuxfoundation.org Tested-by: Bagas Sanjaya Tested-by: Florian Fainelli Tested-by: Linux Kernel Functional Testing Tested-by: Guenter Roeck Tested-by: Ron Economos Tested-by: Sudip Mukherjee Signed-off-by: Greg Kroah-Hartman --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 2c6eb13250e922..69bfff4d9c2d6b 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 5 PATCHLEVEL = 15 -SUBLEVEL = 56 +SUBLEVEL = 57 EXTRAVERSION = NAME = Trick or Treat From aff42cea285f94c1c34ffc710622251b1ceed945 Mon Sep 17 00:00:00 2001 From: Fabien Dessenne Date: Mon, 27 Jun 2022 16:23:50 +0200 Subject: [PATCH 0753/1056] pinctrl: stm32: fix optional IRQ support to gpios commit a1d4ef1adf8bbd302067534ead671a94759687ed upstream. To act as an interrupt controller, a gpio bank relies on the "interrupt-parent" of the pin controller. When this optional "interrupt-parent" misses, do not create any IRQ domain. This fixes a "NULL pointer in stm32_gpio_domain_alloc()" kernel crash when the interrupt-parent = property is not declared in the Device Tree. Fixes: 0eb9f683336d ("pinctrl: Add IRQ support to STM32 gpios") Signed-off-by: Fabien Dessenne Link: https://lore.kernel.org/r/20220627142350.742973-1-fabien.dessenne@foss.st.com Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- drivers/pinctrl/stm32/pinctrl-stm32.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/drivers/pinctrl/stm32/pinctrl-stm32.c b/drivers/pinctrl/stm32/pinctrl-stm32.c index 97a4fb5a932809..d3fa8cf0d72c40 100644 --- a/drivers/pinctrl/stm32/pinctrl-stm32.c +++ b/drivers/pinctrl/stm32/pinctrl-stm32.c @@ -1299,15 +1299,17 @@ static int stm32_gpiolib_register_bank(struct stm32_pinctrl *pctl, bank->bank_ioport_nr = bank_ioport_nr; spin_lock_init(&bank->lock); - /* create irq hierarchical domain */ - bank->fwnode = of_node_to_fwnode(np); + if (pctl->domain) { + /* create irq hierarchical domain */ + bank->fwnode = of_node_to_fwnode(np); - bank->domain = irq_domain_create_hierarchy(pctl->domain, 0, - STM32_GPIO_IRQ_LINE, bank->fwnode, - &stm32_gpio_domain_ops, bank); + bank->domain = irq_domain_create_hierarchy(pctl->domain, 0, STM32_GPIO_IRQ_LINE, + bank->fwnode, &stm32_gpio_domain_ops, + bank); - if (!bank->domain) - return -ENODEV; + if (!bank->domain) + return -ENODEV; + } err = gpiochip_add_data(&bank->gpio_chip, bank); if (err) { @@ -1477,6 +1479,8 @@ int stm32_pctl_probe(struct platform_device *pdev) pctl->domain = stm32_pctrl_get_irq_domain(np); if (IS_ERR(pctl->domain)) return PTR_ERR(pctl->domain); + if (!pctl->domain) + dev_warn(dev, "pinctrl without interrupt support\n"); /* hwspinlock is optional */ hwlock_id = of_hwspin_lock_get_id(pdev->dev.of_node, 0); From 73846553bcde89d0eaffd74da8dc19d133c2c467 Mon Sep 17 00:00:00 2001 From: Ben Dooks Date: Sun, 29 May 2022 16:22:00 +0100 Subject: [PATCH 0754/1056] riscv: add as-options for modules with assembly compontents commit c1f6eff304e4dfa4558b6a8c6b2d26a91db6c998 upstream. When trying to load modules built for RISC-V which include assembly files the kernel loader errors with "unexpected relocation type 'R_RISCV_ALIGN'" due to R_RISCV_ALIGN relocations being generated by the assembler. The R_RISCV_ALIGN relocations can be removed at the expense of code space by adding -mno-relax to gcc and as. In commit 7a8e7da42250138 ("RISC-V: Fixes to module loading") -mno-relax is added to the build variable KBUILD_CFLAGS_MODULE. See [1] for more info. The issue is that when kbuild builds a .S file, it invokes gcc with the -mno-relax flag, but this is not being passed through to the assembler. Adding -Wa,-mno-relax to KBUILD_AFLAGS_MODULE ensures that the assembler is invoked correctly. This may have now been fixed in gcc[2] and this addition should not stop newer gcc and as from working. [1] https://github.com/riscv/riscv-elf-psabi-doc/issues/183 [2] https://github.com/gcc-mirror/gcc/commit/3b0a7d624e64eeb81e4d5e8c62c46d86ef521857 Signed-off-by: Ben Dooks Reviewed-by: Bin Meng Link: https://lore.kernel.org/r/20220529152200.609809-1-ben.dooks@codethink.co.uk Fixes: ab1ef68e5401 ("RISC-V: Add sections of PLT and GOT for kernel module") Cc: stable@vger.kernel.org Signed-off-by: Palmer Dabbelt Signed-off-by: Greg Kroah-Hartman --- arch/riscv/Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile index e03f45f7711aab..583e1ff0c0bf78 100644 --- a/arch/riscv/Makefile +++ b/arch/riscv/Makefile @@ -75,6 +75,7 @@ ifeq ($(CONFIG_PERF_EVENTS),y) endif KBUILD_CFLAGS_MODULE += $(call cc-option,-mno-relax) +KBUILD_AFLAGS_MODULE += $(call as-option,-Wa$(comma)-mno-relax) # GCC versions that support the "-mstrict-align" option default to allowing # unaligned accesses. While unaligned accesses are explicitly allowed in the From 25ba5b824a721e2e6c8e93af60419aa211dcc8c8 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Tue, 19 Jul 2022 15:26:26 +0300 Subject: [PATCH 0755/1056] mlxsw: spectrum_router: Fix IPv4 nexthop gateway indication commit e5ec6a2513383fe2ecc2ee3b5f51d97acbbcd4d8 upstream. mlxsw needs to distinguish nexthops with a gateway from connected nexthops in order to write the former to the adjacency table of the device. The check used to rely on the fact that nexthops with a gateway have a 'link' scope whereas connected nexthops have a 'host' scope. This is no longer correct after commit 747c14307214 ("ip: fix dflt addr selection for connected nexthop"). Fix that by instead checking the address family of the gateway IP. This is a more direct way and also consistent with the IPv6 counterpart in mlxsw_sp_rt6_is_gateway(). Cc: stable@vger.kernel.org Fixes: 747c14307214 ("ip: fix dflt addr selection for connected nexthop") Fixes: 597cfe4fc339 ("nexthop: Add support for IPv4 nexthops") Signed-off-by: Ido Schimmel Reviewed-by: Amit Cohen Reviewed-by: Nicolas Dichtel Reviewed-by: David Ahern Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index d7d90cdce4f6dc..6ef4ca8599acd6 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -5196,7 +5196,7 @@ static bool mlxsw_sp_fi_is_gateway(const struct mlxsw_sp *mlxsw_sp, { const struct fib_nh *nh = fib_info_nh(fi, 0); - return nh->fib_nh_scope == RT_SCOPE_LINK || + return nh->fib_nh_gw_family || mlxsw_sp_nexthop4_ipip_type(mlxsw_sp, nh, NULL); } From 0e66932a9dc9ba47e60405b392e3782a332bc44e Mon Sep 17 00:00:00 2001 From: Eric Snowberg Date: Wed, 20 Jul 2022 12:40:27 -0400 Subject: [PATCH 0756/1056] lockdown: Fix kexec lockdown bypass with ima policy commit 543ce63b664e2c2f9533d089a4664b559c3e6b5b upstream. The lockdown LSM is primarily used in conjunction with UEFI Secure Boot. This LSM may also be used on machines without UEFI. It can also be enabled when UEFI Secure Boot is disabled. One of lockdown's features is to prevent kexec from loading untrusted kernels. Lockdown can be enabled through a bootparam or after the kernel has booted through securityfs. If IMA appraisal is used with the "ima_appraise=log" boot param, lockdown can be defeated with kexec on any machine when Secure Boot is disabled or unavailable. IMA prevents setting "ima_appraise=log" from the boot param when Secure Boot is enabled, but this does not cover cases where lockdown is used without Secure Boot. To defeat lockdown, boot without Secure Boot and add ima_appraise=log to the kernel command line; then: $ echo "integrity" > /sys/kernel/security/lockdown $ echo "appraise func=KEXEC_KERNEL_CHECK appraise_type=imasig" > \ /sys/kernel/security/ima/policy $ kexec -ls unsigned-kernel Add a call to verify ima appraisal is set to "enforce" whenever lockdown is enabled. This fixes CVE-2022-21505. Cc: stable@vger.kernel.org Fixes: 29d3c1c8dfe7 ("kexec: Allow kexec_file() with appropriate IMA policy when locked down") Signed-off-by: Eric Snowberg Acked-by: Mimi Zohar Reviewed-by: John Haxby Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- security/integrity/ima/ima_policy.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/security/integrity/ima/ima_policy.c b/security/integrity/ima/ima_policy.c index fa5a93dbe5d26c..748b97a2582a4f 100644 --- a/security/integrity/ima/ima_policy.c +++ b/security/integrity/ima/ima_policy.c @@ -2034,6 +2034,10 @@ bool ima_appraise_signature(enum kernel_read_file_id id) if (id >= READING_MAX_ID) return false; + if (id == READING_KEXEC_IMAGE && !(ima_appraise & IMA_APPRAISE_ENFORCE) + && security_locked_down(LOCKDOWN_KEXEC)) + return false; + func = read_idmap[id] ?: FILE_CHECK; rcu_read_lock(); From 60e536b0a564eabaeae5fb3ac2f806df95134e78 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Fri, 15 Jul 2022 09:57:22 +0200 Subject: [PATCH 0757/1056] drm/ttm: fix locking in vmap/vunmap TTM GEM helpers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit dbd0da2453c694f2f74651834d90fb280b57f151 upstream. I've stumbled over this while reviewing patches for DMA-buf and it looks like we completely messed the locking up here. In general most TTM function should only be called while holding the appropriate BO resv lock. Without this we could break the internal buffer object state here. Only compile tested! Signed-off-by: Christian König Fixes: 43676605f890 ("drm/ttm: Add vmap/vunmap to TTM and TTM GEM helpers") Cc: stable@vger.kernel.org Reviewed-by: Dmitry Osipenko Link: https://patchwork.freedesktop.org/patch/msgid/20220715111533.467012-1-christian.koenig@amd.com Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/drm_gem_ttm_helper.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_gem_ttm_helper.c b/drivers/gpu/drm/drm_gem_ttm_helper.c index ecf3d2a54a98c1..759c65bfd2845c 100644 --- a/drivers/gpu/drm/drm_gem_ttm_helper.c +++ b/drivers/gpu/drm/drm_gem_ttm_helper.c @@ -64,8 +64,13 @@ int drm_gem_ttm_vmap(struct drm_gem_object *gem, struct dma_buf_map *map) { struct ttm_buffer_object *bo = drm_gem_ttm_of_gem(gem); + int ret; + + dma_resv_lock(gem->resv, NULL); + ret = ttm_bo_vmap(bo, map); + dma_resv_unlock(gem->resv); - return ttm_bo_vmap(bo, map); + return ret; } EXPORT_SYMBOL(drm_gem_ttm_vmap); @@ -82,7 +87,9 @@ void drm_gem_ttm_vunmap(struct drm_gem_object *gem, { struct ttm_buffer_object *bo = drm_gem_ttm_of_gem(gem); + dma_resv_lock(gem->resv, NULL); ttm_bo_vunmap(bo, map); + dma_resv_unlock(gem->resv); } EXPORT_SYMBOL(drm_gem_ttm_vunmap); From 38b7bbe0ef1daa4f80d9a1685e25338a062867dc Mon Sep 17 00:00:00 2001 From: Daniele Palmas Date: Wed, 27 Apr 2022 09:26:48 +0200 Subject: [PATCH 0758/1056] bus: mhi: host: pci_generic: add Telit FN980 v1 hardware revision commit a96ef8b504efb2ad445dfb6d54f9488c3ddf23d2 upstream. Add Telit FN980 v1 hardware revision: 01:00.0 Unassigned class [ff00]: Qualcomm Device [17cb:0306] Subsystem: Device [1c5d:2000] Signed-off-by: Daniele Palmas Reviewed-by: Manivannan Sadhasivam Link: https://lore.kernel.org/r/20220427072648.17635-1-dnlplm@gmail.com [mani: Added "host" to the subject] Signed-off-by: Manivannan Sadhasivam Signed-off-by: Fabio Porcedda Signed-off-by: Greg Kroah-Hartman --- drivers/bus/mhi/pci_generic.c | 38 +++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/drivers/bus/mhi/pci_generic.c b/drivers/bus/mhi/pci_generic.c index 0982642a7907ea..1621e4ac94b226 100644 --- a/drivers/bus/mhi/pci_generic.c +++ b/drivers/bus/mhi/pci_generic.c @@ -406,7 +406,45 @@ static const struct mhi_pci_dev_info mhi_mv31_info = { .mru_default = 32768, }; +static const struct mhi_channel_config mhi_telit_fn980_hw_v1_channels[] = { + MHI_CHANNEL_CONFIG_UL(14, "QMI", 32, 0), + MHI_CHANNEL_CONFIG_DL(15, "QMI", 32, 0), + MHI_CHANNEL_CONFIG_UL(20, "IPCR", 16, 0), + MHI_CHANNEL_CONFIG_DL_AUTOQUEUE(21, "IPCR", 16, 0), + MHI_CHANNEL_CONFIG_HW_UL(100, "IP_HW0", 128, 1), + MHI_CHANNEL_CONFIG_HW_DL(101, "IP_HW0", 128, 2), +}; + +static struct mhi_event_config mhi_telit_fn980_hw_v1_events[] = { + MHI_EVENT_CONFIG_CTRL(0, 128), + MHI_EVENT_CONFIG_HW_DATA(1, 1024, 100), + MHI_EVENT_CONFIG_HW_DATA(2, 2048, 101) +}; + +static struct mhi_controller_config modem_telit_fn980_hw_v1_config = { + .max_channels = 128, + .timeout_ms = 20000, + .num_channels = ARRAY_SIZE(mhi_telit_fn980_hw_v1_channels), + .ch_cfg = mhi_telit_fn980_hw_v1_channels, + .num_events = ARRAY_SIZE(mhi_telit_fn980_hw_v1_events), + .event_cfg = mhi_telit_fn980_hw_v1_events, +}; + +static const struct mhi_pci_dev_info mhi_telit_fn980_hw_v1_info = { + .name = "telit-fn980-hwv1", + .fw = "qcom/sdx55m/sbl1.mbn", + .edl = "qcom/sdx55m/edl.mbn", + .config = &modem_telit_fn980_hw_v1_config, + .bar_num = MHI_PCI_DEFAULT_BAR_NUM, + .dma_data_width = 32, + .mru_default = 32768, + .sideband_wake = false, +}; + static const struct pci_device_id mhi_pci_id_table[] = { + /* Telit FN980 hardware revision v1 */ + { PCI_DEVICE_SUB(PCI_VENDOR_ID_QCOM, 0x0306, 0x1C5D, 0x2000), + .driver_data = (kernel_ulong_t) &mhi_telit_fn980_hw_v1_info }, { PCI_DEVICE(PCI_VENDOR_ID_QCOM, 0x0306), .driver_data = (kernel_ulong_t) &mhi_qcom_sdx55_info }, { PCI_DEVICE(PCI_VENDOR_ID_QCOM, 0x0304), From d7c4f9b8ed62d5e3b1e1e7c520b16567084c8800 Mon Sep 17 00:00:00 2001 From: Daniele Palmas Date: Mon, 2 May 2022 13:20:36 +0200 Subject: [PATCH 0759/1056] bus: mhi: host: pci_generic: add Telit FN990 commit 77fc41204734042861210b9d05338c9b8360affb upstream. Add Telit FN990: 01:00.0 Unassigned class [ff00]: Qualcomm Device 0308 Subsystem: Device 1c5d:2010 Signed-off-by: Daniele Palmas Reviewed-by: Manivannan Sadhasivam Link: https://lore.kernel.org/r/20220502112036.443618-1-dnlplm@gmail.com [mani: Added "host" to the subject] Signed-off-by: Manivannan Sadhasivam Signed-off-by: Fabio Porcedda Signed-off-by: Greg Kroah-Hartman --- drivers/bus/mhi/pci_generic.c | 41 +++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/drivers/bus/mhi/pci_generic.c b/drivers/bus/mhi/pci_generic.c index 1621e4ac94b226..b780990faf8065 100644 --- a/drivers/bus/mhi/pci_generic.c +++ b/drivers/bus/mhi/pci_generic.c @@ -441,6 +441,44 @@ static const struct mhi_pci_dev_info mhi_telit_fn980_hw_v1_info = { .sideband_wake = false, }; +static const struct mhi_channel_config mhi_telit_fn990_channels[] = { + MHI_CHANNEL_CONFIG_UL_SBL(2, "SAHARA", 32, 0), + MHI_CHANNEL_CONFIG_DL_SBL(3, "SAHARA", 32, 0), + MHI_CHANNEL_CONFIG_UL(4, "DIAG", 64, 1), + MHI_CHANNEL_CONFIG_DL(5, "DIAG", 64, 1), + MHI_CHANNEL_CONFIG_UL(12, "MBIM", 32, 0), + MHI_CHANNEL_CONFIG_DL(13, "MBIM", 32, 0), + MHI_CHANNEL_CONFIG_UL(32, "DUN", 32, 0), + MHI_CHANNEL_CONFIG_DL(33, "DUN", 32, 0), + MHI_CHANNEL_CONFIG_HW_UL(100, "IP_HW0_MBIM", 128, 2), + MHI_CHANNEL_CONFIG_HW_DL(101, "IP_HW0_MBIM", 128, 3), +}; + +static struct mhi_event_config mhi_telit_fn990_events[] = { + MHI_EVENT_CONFIG_CTRL(0, 128), + MHI_EVENT_CONFIG_DATA(1, 128), + MHI_EVENT_CONFIG_HW_DATA(2, 1024, 100), + MHI_EVENT_CONFIG_HW_DATA(3, 2048, 101) +}; + +static const struct mhi_controller_config modem_telit_fn990_config = { + .max_channels = 128, + .timeout_ms = 20000, + .num_channels = ARRAY_SIZE(mhi_telit_fn990_channels), + .ch_cfg = mhi_telit_fn990_channels, + .num_events = ARRAY_SIZE(mhi_telit_fn990_events), + .event_cfg = mhi_telit_fn990_events, +}; + +static const struct mhi_pci_dev_info mhi_telit_fn990_info = { + .name = "telit-fn990", + .config = &modem_telit_fn990_config, + .bar_num = MHI_PCI_DEFAULT_BAR_NUM, + .dma_data_width = 32, + .sideband_wake = false, + .mru_default = 32768, +}; + static const struct pci_device_id mhi_pci_id_table[] = { /* Telit FN980 hardware revision v1 */ { PCI_DEVICE_SUB(PCI_VENDOR_ID_QCOM, 0x0306, 0x1C5D, 0x2000), @@ -449,6 +487,9 @@ static const struct pci_device_id mhi_pci_id_table[] = { .driver_data = (kernel_ulong_t) &mhi_qcom_sdx55_info }, { PCI_DEVICE(PCI_VENDOR_ID_QCOM, 0x0304), .driver_data = (kernel_ulong_t) &mhi_qcom_sdx24_info }, + /* Telit FN990 */ + { PCI_DEVICE_SUB(PCI_VENDOR_ID_QCOM, 0x0308, 0x1c5d, 0x2010), + .driver_data = (kernel_ulong_t) &mhi_telit_fn990_info }, { PCI_DEVICE(0x1eac, 0x1001), /* EM120R-GL (sdx24) */ .driver_data = (kernel_ulong_t) &mhi_quectel_em1xx_info }, { PCI_DEVICE(0x1eac, 0x1002), /* EM160R-GL (sdx24) */ From cfd3a9be0ac423be41afcc7a07d708056bf097a8 Mon Sep 17 00:00:00 2001 From: Oleksandr Tymoshenko Date: Fri, 15 Jul 2022 23:15:41 +0000 Subject: [PATCH 0760/1056] Revert "selftest/vm: verify remap destination address in mremap_test" This reverts commit 0b4e16093e081a3ab08b0d6cedf79b249f41b248. The upstream commit 18d609daa546 ("selftest/vm: verify remap destination address in mremap_test") was backported as commit 2688d967ec65 ("selftest/vm: verify remap destination address in mremap_test"). Repeated backport introduced the duplicate of function is_remap_region_valid to the file breakign the vm selftest build. Fixes: 0b4e16093e08 ("selftest/vm: verify remap destination address in mremap_test") Signed-off-by: Oleksandr Tymoshenko Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/vm/mremap_test.c | 24 ------------------------ 1 file changed, 24 deletions(-) diff --git a/tools/testing/selftests/vm/mremap_test.c b/tools/testing/selftests/vm/mremap_test.c index 8f4dbbd60c09ca..efcbf537b3d575 100644 --- a/tools/testing/selftests/vm/mremap_test.c +++ b/tools/testing/selftests/vm/mremap_test.c @@ -66,30 +66,6 @@ enum { .expect_failure = should_fail \ } -/* - * Returns false if the requested remap region overlaps with an - * existing mapping (e.g text, stack) else returns true. - */ -static bool is_remap_region_valid(void *addr, unsigned long long size) -{ - void *remap_addr = NULL; - bool ret = true; - - /* Use MAP_FIXED_NOREPLACE flag to ensure region is not mapped */ - remap_addr = mmap(addr, size, PROT_READ | PROT_WRITE, - MAP_FIXED_NOREPLACE | MAP_ANONYMOUS | MAP_SHARED, - -1, 0); - - if (remap_addr == MAP_FAILED) { - if (errno == EEXIST) - ret = false; - } else { - munmap(remap_addr, size); - } - - return ret; -} - /* Returns mmap_min_addr sysctl tunable from procfs */ static unsigned long long get_mmap_min_addr(void) { From 0791309aa0eb2d39c116ff95bab120e53b0c8f94 Mon Sep 17 00:00:00 2001 From: Oleksandr Tymoshenko Date: Fri, 15 Jul 2022 23:15:42 +0000 Subject: [PATCH 0761/1056] Revert "selftest/vm: verify mmap addr in mremap_test" This reverts commit e8b9989597daac896b3400b7005f24bf15233d9a. The upstream commit 9c85a9bae267 ("selftest/vm: verify mmap addr in mremap_test") was backported as commit a17404fcbfd0 ("selftest/vm: verify mmap addr in mremap_test"). Repeated backport introduced the duplicate of function get_mmap_min_addr to the file breakign the vm selftest build. Fixes: e8b9989597da ("selftest/vm: verify mmap addr in mremap_test") Signed-off-by: Oleksandr Tymoshenko Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/vm/mremap_test.c | 29 ------------------------ 1 file changed, 29 deletions(-) diff --git a/tools/testing/selftests/vm/mremap_test.c b/tools/testing/selftests/vm/mremap_test.c index efcbf537b3d575..e3ce33a9954ea9 100644 --- a/tools/testing/selftests/vm/mremap_test.c +++ b/tools/testing/selftests/vm/mremap_test.c @@ -66,35 +66,6 @@ enum { .expect_failure = should_fail \ } -/* Returns mmap_min_addr sysctl tunable from procfs */ -static unsigned long long get_mmap_min_addr(void) -{ - FILE *fp; - int n_matched; - static unsigned long long addr; - - if (addr) - return addr; - - fp = fopen("/proc/sys/vm/mmap_min_addr", "r"); - if (fp == NULL) { - ksft_print_msg("Failed to open /proc/sys/vm/mmap_min_addr: %s\n", - strerror(errno)); - exit(KSFT_SKIP); - } - - n_matched = fscanf(fp, "%llu", &addr); - if (n_matched != 1) { - ksft_print_msg("Failed to read /proc/sys/vm/mmap_min_addr: %s\n", - strerror(errno)); - fclose(fp); - exit(KSFT_SKIP); - } - - fclose(fp); - return addr; -} - /* * Returns false if the requested remap region overlaps with an * existing mapping (e.g text, stack) else returns true. From 5c676a214d6359ef84a673ca791716f8c76555cf Mon Sep 17 00:00:00 2001 From: Jeffrey Hugo Date: Mon, 18 Jul 2022 15:20:29 +0000 Subject: [PATCH 0762/1056] PCI: hv: Fix multi-MSI to allow more than one MSI vector commit 08e61e861a0e47e5e1a3fb78406afd6b0cea6b6d upstream. If the allocation of multiple MSI vectors for multi-MSI fails in the core PCI framework, the framework will retry the allocation as a single MSI vector, assuming that meets the min_vecs specified by the requesting driver. Hyper-V advertises that multi-MSI is supported, but reuses the VECTOR domain to implement that for x86. The VECTOR domain does not support multi-MSI, so the alloc will always fail and fallback to a single MSI allocation. In short, Hyper-V advertises a capability it does not implement. Hyper-V can support multi-MSI because it coordinates with the hypervisor to map the MSIs in the IOMMU's interrupt remapper, which is something the VECTOR domain does not have. Therefore the fix is simple - copy what the x86 IOMMU drivers (AMD/Intel-IR) do by removing X86_IRQ_ALLOC_CONTIGUOUS_VECTORS after calling the VECTOR domain's pci_msi_prepare(). 5.15 backport - adds the hv_msi_prepare wrapper function Fixes: 4daace0d8ce8 ("PCI: hv: Add paravirtual PCI front-end for Microsoft Hyper-V VMs") Signed-off-by: Jeffrey Hugo Reviewed-by: Dexuan Cui Link: https://lore.kernel.org/r/1649856981-14649-1-git-send-email-quic_jhugo@quicinc.com Signed-off-by: Wei Liu Signed-off-by: Carl Vanderlip Signed-off-by: Greg Kroah-Hartman --- drivers/pci/controller/pci-hyperv.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c index 5b156c563e3a4d..44731a0eb5329e 100644 --- a/drivers/pci/controller/pci-hyperv.c +++ b/drivers/pci/controller/pci-hyperv.c @@ -1204,6 +1204,21 @@ static void hv_irq_mask(struct irq_data *data) pci_msi_mask_irq(data); } +static int hv_msi_prepare(struct irq_domain *domain, struct device *dev, + int nvec, msi_alloc_info_t *info) +{ + int ret = pci_msi_prepare(domain, dev, nvec, info); + + /* + * By using the interrupt remapper in the hypervisor IOMMU, contiguous + * CPU vectors is not needed for multi-MSI + */ + if (info->type == X86_IRQ_ALLOC_TYPE_PCI_MSI) + info->flags &= ~X86_IRQ_ALLOC_CONTIGUOUS_VECTORS; + + return ret; +} + /** * hv_irq_unmask() - "Unmask" the IRQ by setting its current * affinity. @@ -1601,7 +1616,7 @@ static struct irq_chip hv_msi_irq_chip = { }; static struct msi_domain_ops hv_msi_ops = { - .msi_prepare = pci_msi_prepare, + .msi_prepare = hv_msi_prepare, .msi_free = hv_msi_free, }; From 35d24b115a407c0a1a73900d025da77be2763ed3 Mon Sep 17 00:00:00 2001 From: Jeffrey Hugo Date: Mon, 18 Jul 2022 15:20:30 +0000 Subject: [PATCH 0763/1056] PCI: hv: Fix hv_arch_irq_unmask() for multi-MSI commit 455880dfe292a2bdd3b4ad6a107299fce610e64b upstream. In the multi-MSI case, hv_arch_irq_unmask() will only operate on the first MSI of the N allocated. This is because only the first msi_desc is cached and it is shared by all the MSIs of the multi-MSI block. This means that hv_arch_irq_unmask() gets the correct address, but the wrong data (always 0). This can break MSIs. Lets assume MSI0 is vector 34 on CPU0, and MSI1 is vector 33 on CPU0. hv_arch_irq_unmask() is called on MSI0. It uses a hypercall to configure the MSI address and data (0) to vector 34 of CPU0. This is correct. Then hv_arch_irq_unmask is called on MSI1. It uses another hypercall to configure the MSI address and data (0) to vector 33 of CPU0. This is wrong, and results in both MSI0 and MSI1 being routed to vector 33. Linux will observe extra instances of MSI1 and no instances of MSI0 despite the endpoint device behaving correctly. For the multi-MSI case, we need unique address and data info for each MSI, but the cached msi_desc does not provide that. However, that information can be gotten from the int_desc cached in the chip_data by compose_msi_msg(). Fix the multi-MSI case to use that cached information instead. Since hv_set_msi_entry_from_desc() is no longer applicable, remove it. 5.15 backport - no changes to code, but merge conflict due to refactor. Signed-off-by: Jeffrey Hugo Reviewed-by: Michael Kelley Link: https://lore.kernel.org/r/1651068453-29588-1-git-send-email-quic_jhugo@quicinc.com Signed-off-by: Wei Liu Signed-off-by: Carl Vanderlip Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/mshyperv.h | 7 ------- drivers/pci/controller/pci-hyperv.c | 5 ++++- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index adccbc209169aa..c2b9ab94408e69 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -176,13 +176,6 @@ bool hv_vcpu_is_preempted(int vcpu); static inline void hv_apic_init(void) {} #endif -static inline void hv_set_msi_entry_from_desc(union hv_msi_entry *msi_entry, - struct msi_desc *msi_desc) -{ - msi_entry->address.as_uint32 = msi_desc->msg.address_lo; - msi_entry->data.as_uint32 = msi_desc->msg.data; -} - struct irq_domain *hv_create_pci_msi_domain(void); int hv_map_ioapic_interrupt(int ioapic_id, bool level, int vcpu, int vector, diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c index 44731a0eb5329e..560a9aa297c3a7 100644 --- a/drivers/pci/controller/pci-hyperv.c +++ b/drivers/pci/controller/pci-hyperv.c @@ -1234,6 +1234,7 @@ static void hv_irq_unmask(struct irq_data *data) struct msi_desc *msi_desc = irq_data_get_msi_desc(data); struct irq_cfg *cfg = irqd_cfg(data); struct hv_retarget_device_interrupt *params; + struct tran_int_desc *int_desc; struct hv_pcibus_device *hbus; struct cpumask *dest; cpumask_var_t tmp; @@ -1248,6 +1249,7 @@ static void hv_irq_unmask(struct irq_data *data) pdev = msi_desc_to_pci_dev(msi_desc); pbus = pdev->bus; hbus = container_of(pbus->sysdata, struct hv_pcibus_device, sysdata); + int_desc = data->chip_data; spin_lock_irqsave(&hbus->retarget_msi_interrupt_lock, flags); @@ -1255,7 +1257,8 @@ static void hv_irq_unmask(struct irq_data *data) memset(params, 0, sizeof(*params)); params->partition_id = HV_PARTITION_ID_SELF; params->int_entry.source = HV_INTERRUPT_SOURCE_MSI; - hv_set_msi_entry_from_desc(¶ms->int_entry.msi_entry, msi_desc); + params->int_entry.msi_entry.address.as_uint32 = int_desc->address & 0xffffffff; + params->int_entry.msi_entry.data.as_uint32 = int_desc->data; params->device_id = (hbus->hdev->dev_instance.b[5] << 24) | (hbus->hdev->dev_instance.b[4] << 16) | (hbus->hdev->dev_instance.b[7] << 8) | From 2b77425d0ae4cf6338f7841e9e15fa1e61b8c9bf Mon Sep 17 00:00:00 2001 From: Jeffrey Hugo Date: Mon, 18 Jul 2022 15:20:31 +0000 Subject: [PATCH 0764/1056] PCI: hv: Reuse existing IRTE allocation in compose_msi_msg() commit b4b77778ecc5bfbd4e77de1b2fd5c1dd3c655f1f upstream. Currently if compose_msi_msg() is called multiple times, it will free any previous IRTE allocation, and generate a new allocation. While nothing prevents this from occurring, it is extraneous when Linux could just reuse the existing allocation and avoid a bunch of overhead. However, when future IRTE allocations operate on blocks of MSIs instead of a single line, freeing the allocation will impact all of the lines. This could cause an issue where an allocation of N MSIs occurs, then some of the lines are retargeted, and finally the allocation is freed/reallocated. The freeing of the allocation removes all of the configuration for the entire block, which requires all the lines to be retargeted, which might not happen since some lines might already be unmasked/active. Signed-off-by: Jeffrey Hugo Reviewed-by: Dexuan Cui Tested-by: Dexuan Cui Tested-by: Michael Kelley Link: https://lore.kernel.org/r/1652282582-21595-1-git-send-email-quic_jhugo@quicinc.com Signed-off-by: Wei Liu Signed-off-by: Carl Vanderlip Signed-off-by: Greg Kroah-Hartman --- drivers/pci/controller/pci-hyperv.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c index 560a9aa297c3a7..0200a291413a34 100644 --- a/drivers/pci/controller/pci-hyperv.c +++ b/drivers/pci/controller/pci-hyperv.c @@ -1458,6 +1458,15 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) u32 size; int ret; + /* Reuse the previous allocation */ + if (data->chip_data) { + int_desc = data->chip_data; + msg->address_hi = int_desc->address >> 32; + msg->address_lo = int_desc->address & 0xffffffff; + msg->data = int_desc->data; + return; + } + pdev = msi_desc_to_pci_dev(irq_data_get_msi_desc(data)); dest = irq_data_get_effective_affinity_mask(data); pbus = pdev->bus; @@ -1467,13 +1476,6 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) if (!hpdev) goto return_null_message; - /* Free any previous message that might have already been composed. */ - if (data->chip_data) { - int_desc = data->chip_data; - data->chip_data = NULL; - hv_int_desc_free(hpdev, int_desc); - } - int_desc = kzalloc(sizeof(*int_desc), GFP_ATOMIC); if (!int_desc) goto drop_reference; From 92dcb50f7f09971cad1b1cc67ec31756c5b78dec Mon Sep 17 00:00:00 2001 From: Jeffrey Hugo Date: Mon, 18 Jul 2022 15:20:32 +0000 Subject: [PATCH 0765/1056] PCI: hv: Fix interrupt mapping for multi-MSI commit a2bad844a67b1c7740bda63e87453baf63c3a7f7 upstream. According to Dexuan, the hypervisor folks beleive that multi-msi allocations are not correct. compose_msi_msg() will allocate multi-msi one by one. However, multi-msi is a block of related MSIs, with alignment requirements. In order for the hypervisor to allocate properly aligned and consecutive entries in the IOMMU Interrupt Remapping Table, there should be a single mapping request that requests all of the multi-msi vectors in one shot. Dexuan suggests detecting the multi-msi case and composing a single request related to the first MSI. Then for the other MSIs in the same block, use the cached information. This appears to be viable, so do it. 5.15 backport - add hv_msi_get_int_vector helper function. Fixed merge conflict due to delivery_mode name change (APIC_DELIVERY_MODE_FIXED is the value given to DELIVERY_MODE on x86). Removed unused variable in hv_compose_msi_msg. Fixed reference to msi_desc->pci to point to the same is_msix variable. Suggested-by: Dexuan Cui Signed-off-by: Jeffrey Hugo Reviewed-by: Dexuan Cui Tested-by: Michael Kelley Link: https://lore.kernel.org/r/1652282599-21643-1-git-send-email-quic_jhugo@quicinc.com Signed-off-by: Wei Liu Signed-off-by: Carl Vanderlip Signed-off-by: Greg Kroah-Hartman --- drivers/pci/controller/pci-hyperv.c | 68 ++++++++++++++++++++++++----- 1 file changed, 57 insertions(+), 11 deletions(-) diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c index 0200a291413a34..9b54715a4b63b8 100644 --- a/drivers/pci/controller/pci-hyperv.c +++ b/drivers/pci/controller/pci-hyperv.c @@ -1142,6 +1142,10 @@ static void hv_int_desc_free(struct hv_pci_dev *hpdev, u8 buffer[sizeof(struct pci_delete_interrupt)]; } ctxt; + if (!int_desc->vector_count) { + kfree(int_desc); + return; + } memset(&ctxt, 0, sizeof(ctxt)); int_pkt = (struct pci_delete_interrupt *)&ctxt.pkt.message; int_pkt->message_type.type = @@ -1204,6 +1208,13 @@ static void hv_irq_mask(struct irq_data *data) pci_msi_mask_irq(data); } +static unsigned int hv_msi_get_int_vector(struct irq_data *data) +{ + struct irq_cfg *cfg = irqd_cfg(data); + + return cfg->vector; +} + static int hv_msi_prepare(struct irq_domain *domain, struct device *dev, int nvec, msi_alloc_info_t *info) { @@ -1359,12 +1370,12 @@ static void hv_pci_compose_compl(void *context, struct pci_response *resp, static u32 hv_compose_msi_req_v1( struct pci_create_interrupt *int_pkt, struct cpumask *affinity, - u32 slot, u8 vector) + u32 slot, u8 vector, u8 vector_count) { int_pkt->message_type.type = PCI_CREATE_INTERRUPT_MESSAGE; int_pkt->wslot.slot = slot; int_pkt->int_desc.vector = vector; - int_pkt->int_desc.vector_count = 1; + int_pkt->int_desc.vector_count = vector_count; int_pkt->int_desc.delivery_mode = APIC_DELIVERY_MODE_FIXED; /* @@ -1387,14 +1398,14 @@ static int hv_compose_msi_req_get_cpu(struct cpumask *affinity) static u32 hv_compose_msi_req_v2( struct pci_create_interrupt2 *int_pkt, struct cpumask *affinity, - u32 slot, u8 vector) + u32 slot, u8 vector, u8 vector_count) { int cpu; int_pkt->message_type.type = PCI_CREATE_INTERRUPT_MESSAGE2; int_pkt->wslot.slot = slot; int_pkt->int_desc.vector = vector; - int_pkt->int_desc.vector_count = 1; + int_pkt->int_desc.vector_count = vector_count; int_pkt->int_desc.delivery_mode = APIC_DELIVERY_MODE_FIXED; cpu = hv_compose_msi_req_get_cpu(affinity); int_pkt->int_desc.processor_array[0] = @@ -1406,7 +1417,7 @@ static u32 hv_compose_msi_req_v2( static u32 hv_compose_msi_req_v3( struct pci_create_interrupt3 *int_pkt, struct cpumask *affinity, - u32 slot, u32 vector) + u32 slot, u32 vector, u8 vector_count) { int cpu; @@ -1414,7 +1425,7 @@ static u32 hv_compose_msi_req_v3( int_pkt->wslot.slot = slot; int_pkt->int_desc.vector = vector; int_pkt->int_desc.reserved = 0; - int_pkt->int_desc.vector_count = 1; + int_pkt->int_desc.vector_count = vector_count; int_pkt->int_desc.delivery_mode = APIC_DELIVERY_MODE_FIXED; cpu = hv_compose_msi_req_get_cpu(affinity); int_pkt->int_desc.processor_array[0] = @@ -1437,7 +1448,6 @@ static u32 hv_compose_msi_req_v3( */ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) { - struct irq_cfg *cfg = irqd_cfg(data); struct hv_pcibus_device *hbus; struct vmbus_channel *channel; struct hv_pci_dev *hpdev; @@ -1446,6 +1456,8 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) struct cpumask *dest; struct compose_comp_ctxt comp; struct tran_int_desc *int_desc; + struct msi_desc *msi_desc; + u8 vector, vector_count; struct { struct pci_packet pci_pkt; union { @@ -1467,7 +1479,8 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) return; } - pdev = msi_desc_to_pci_dev(irq_data_get_msi_desc(data)); + msi_desc = irq_data_get_msi_desc(data); + pdev = msi_desc_to_pci_dev(msi_desc); dest = irq_data_get_effective_affinity_mask(data); pbus = pdev->bus; hbus = container_of(pbus->sysdata, struct hv_pcibus_device, sysdata); @@ -1480,6 +1493,36 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) if (!int_desc) goto drop_reference; + if (!msi_desc->msi_attrib.is_msix && msi_desc->nvec_used > 1) { + /* + * If this is not the first MSI of Multi MSI, we already have + * a mapping. Can exit early. + */ + if (msi_desc->irq != data->irq) { + data->chip_data = int_desc; + int_desc->address = msi_desc->msg.address_lo | + (u64)msi_desc->msg.address_hi << 32; + int_desc->data = msi_desc->msg.data + + (data->irq - msi_desc->irq); + msg->address_hi = msi_desc->msg.address_hi; + msg->address_lo = msi_desc->msg.address_lo; + msg->data = int_desc->data; + put_pcichild(hpdev); + return; + } + /* + * The vector we select here is a dummy value. The correct + * value gets sent to the hypervisor in unmask(). This needs + * to be aligned with the count, and also not zero. Multi-msi + * is powers of 2 up to 32, so 32 will always work here. + */ + vector = 32; + vector_count = msi_desc->nvec_used; + } else { + vector = hv_msi_get_int_vector(data); + vector_count = 1; + } + memset(&ctxt, 0, sizeof(ctxt)); init_completion(&comp.comp_pkt.host_event); ctxt.pci_pkt.completion_func = hv_pci_compose_compl; @@ -1490,7 +1533,8 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) size = hv_compose_msi_req_v1(&ctxt.int_pkts.v1, dest, hpdev->desc.win_slot.slot, - cfg->vector); + vector, + vector_count); break; case PCI_PROTOCOL_VERSION_1_2: @@ -1498,14 +1542,16 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) size = hv_compose_msi_req_v2(&ctxt.int_pkts.v2, dest, hpdev->desc.win_slot.slot, - cfg->vector); + vector, + vector_count); break; case PCI_PROTOCOL_VERSION_1_4: size = hv_compose_msi_req_v3(&ctxt.int_pkts.v3, dest, hpdev->desc.win_slot.slot, - cfg->vector); + vector, + vector_count); break; default: From be56f007c41400f5115e390cab69ff4c464c2370 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pali=20Roh=C3=A1r?= Date: Tue, 28 Jun 2022 12:09:22 +0200 Subject: [PATCH 0766/1056] serial: mvebu-uart: correctly report configured baudrate value MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 4f532c1e25319e42996ec18a1f473fd50c8e575d upstream. Functions tty_termios_encode_baud_rate() and uart_update_timeout() should be called with the baudrate value which was set to hardware. Linux then report exact values via ioctl(TCGETS2) to userspace. Change mvebu_uart_baud_rate_set() function to return baudrate value which was set to hardware and propagate this value to above mentioned functions. With this change userspace would see precise value in termios c_ospeed field. Fixes: 68a0db1d7da2 ("serial: mvebu-uart: add function to change baudrate") Cc: stable Reviewed-by: Ilpo Järvinen Signed-off-by: Pali Rohár Link: https://lore.kernel.org/r/20220628100922.10717-1-pali@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/mvebu-uart.c | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/drivers/tty/serial/mvebu-uart.c b/drivers/tty/serial/mvebu-uart.c index ab226da75f7bad..8eb64898b159d9 100644 --- a/drivers/tty/serial/mvebu-uart.c +++ b/drivers/tty/serial/mvebu-uart.c @@ -442,13 +442,13 @@ static void mvebu_uart_shutdown(struct uart_port *port) } } -static int mvebu_uart_baud_rate_set(struct uart_port *port, unsigned int baud) +static unsigned int mvebu_uart_baud_rate_set(struct uart_port *port, unsigned int baud) { unsigned int d_divisor, m_divisor; u32 brdv, osamp; if (!port->uartclk) - return -EOPNOTSUPP; + return 0; /* * The baudrate is derived from the UART clock thanks to two divisors: @@ -472,7 +472,7 @@ static int mvebu_uart_baud_rate_set(struct uart_port *port, unsigned int baud) osamp &= ~OSAMP_DIVISORS_MASK; writel(osamp, port->membase + UART_OSAMP); - return 0; + return DIV_ROUND_CLOSEST(port->uartclk, d_divisor * m_divisor); } static void mvebu_uart_set_termios(struct uart_port *port, @@ -509,15 +509,11 @@ static void mvebu_uart_set_termios(struct uart_port *port, max_baud = 230400; baud = uart_get_baud_rate(port, termios, old, min_baud, max_baud); - if (mvebu_uart_baud_rate_set(port, baud)) { - /* No clock available, baudrate cannot be changed */ - if (old) - baud = uart_get_baud_rate(port, old, NULL, - min_baud, max_baud); - } else { - tty_termios_encode_baud_rate(termios, baud, baud); - uart_update_timeout(port, termios->c_cflag, baud); - } + baud = mvebu_uart_baud_rate_set(port, baud); + + /* In case baudrate cannot be changed, report previous old value */ + if (baud == 0 && old) + baud = tty_termios_baud_rate(old); /* Only the following flag changes are supported */ if (old) { @@ -528,6 +524,11 @@ static void mvebu_uart_set_termios(struct uart_port *port, termios->c_cflag |= CS8; } + if (baud != 0) { + tty_termios_encode_baud_rate(termios, baud, baud); + uart_update_timeout(port, termios->c_cflag, baud); + } + spin_unlock_irqrestore(&port->lock, flags); } From 0cac1c84e1ccdd71924c644f83e24f5ef47d6f7c Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Tue, 19 Jul 2022 18:08:12 +0200 Subject: [PATCH 0767/1056] batman-adv: Use netif_rx_any_context() any. This reverts the stable commit e65d78b12fbc0 ("batman-adv: Use netif_rx().") The commit message says: | Since commit | baebdf48c3600 ("net: dev: Makes sure netif_rx() can be invoked in any context.") | | the function netif_rx() can be used in preemptible/thread context as | well as in interrupt context. This commit (baebdf48c3600) has not been backported to the 5.15 stable series and therefore, the commit which builds upon it, must not be backported either. Revert the backport and use netif_rx_any_context() again. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Greg Kroah-Hartman --- net/batman-adv/bridge_loop_avoidance.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c index 11f6ef657d822d..17687848daec56 100644 --- a/net/batman-adv/bridge_loop_avoidance.c +++ b/net/batman-adv/bridge_loop_avoidance.c @@ -443,7 +443,7 @@ static void batadv_bla_send_claim(struct batadv_priv *bat_priv, u8 *mac, batadv_add_counter(bat_priv, BATADV_CNT_RX_BYTES, skb->len + ETH_HLEN); - netif_rx(skb); + netif_rx_any_context(skb); out: batadv_hardif_put(primary_if); } From 4666a6eb390c683e346d3ccb7bb8e41ef994afc5 Mon Sep 17 00:00:00 2001 From: Sean Wang Date: Fri, 15 Jul 2022 12:59:22 -0700 Subject: [PATCH 0768/1056] Revert "mt76: mt7921: Fix the error handling path of mt7921_pci_probe()" This reverts commit 663457f421d41e9d2fcb1e84baf43d1433f80c08 that is the commit 44c4237cf3436bda2b185ff728123651ad133f69 upstream. Because there was mistake in '649178c0493e ("mt76: mt7921e: fix possible probe failure after reboot")' that caused WiFi reset cannot work well as the reported issue "PROBLEM: [Stable v5.15.42+] [mt7921] Wake after suspend locks up system when mt7921-driver is used on a Lenovo ThinkPad E15 G3" described in http://lists.infradead.org/pipermail/linux-mediatek/2022-June/042668.html So we need to revert the patch first to avoid the conflict of reverting '649178c0493e ("mt76: mt7921e: fix possible probe failure after reboot")' and will be applied back later after fixing. Signed-off-by: Sean Wang Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/mediatek/mt76/mt7921/pci.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/pci.c b/drivers/net/wireless/mediatek/mt76/mt7921/pci.c index 3d35838ef30694..7d9b23a0023881 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7921/pci.c +++ b/drivers/net/wireless/mediatek/mt76/mt7921/pci.c @@ -254,10 +254,8 @@ static int mt7921_pci_probe(struct pci_dev *pdev, dev->bus_ops = dev->mt76.bus; bus_ops = devm_kmemdup(dev->mt76.dev, dev->bus_ops, sizeof(*bus_ops), GFP_KERNEL); - if (!bus_ops) { - ret = -ENOMEM; - goto err_free_dev; - } + if (!bus_ops) + return -ENOMEM; bus_ops->rr = mt7921_rr; bus_ops->wr = mt7921_wr; @@ -266,7 +264,7 @@ static int mt7921_pci_probe(struct pci_dev *pdev, ret = __mt7921_mcu_drv_pmctrl(dev); if (ret) - goto err_free_dev; + return ret; mdev->rev = (mt7921_l1_rr(dev, MT_HW_CHIPID) << 16) | (mt7921_l1_rr(dev, MT_HW_REV) & 0xff); From 191c16f921ad835d2901a630300daebf6379447e Mon Sep 17 00:00:00 2001 From: Sean Wang Date: Fri, 15 Jul 2022 12:59:23 -0700 Subject: [PATCH 0769/1056] Revert "mt76: mt7921e: fix possible probe failure after reboot" This reverts commit 649178c0493e4080b2b226b0ef9fa2d834b1b412 that is the commit 602cc0c9618a819ab00ea3c9400742a0ca318380 upstream. Because there was mistake in '649178c0493e ("mt76: mt7921e: fix possible probe failure after reboot")' that caused WiFi reset cannot work well as the reported issue "PROBLEM: [Stable v5.15.42+] [mt7921] Wake after suspend locks up system when mt7921-driver is used on a Lenovo ThinkPad E15 G3" described in http://lists.infradead.org/pipermail/linux-mediatek/2022-June/042668.html So, we need to revert it before fixing and landing it again on the stable tree from upstream. Signed-off-by: Sean Wang Signed-off-by: Greg Kroah-Hartman --- .../net/wireless/mediatek/mt76/mt7921/dma.c | 115 +++++++++++++++++ .../net/wireless/mediatek/mt76/mt7921/mcu.c | 20 +-- .../net/wireless/mediatek/mt76/mt7921/pci.c | 121 ------------------ 3 files changed, 125 insertions(+), 131 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/dma.c b/drivers/net/wireless/mediatek/mt76/mt7921/dma.c index 93d0cc1827d261..7d7d43a5422f80 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7921/dma.c +++ b/drivers/net/wireless/mediatek/mt76/mt7921/dma.c @@ -118,6 +118,109 @@ static void mt7921_dma_prefetch(struct mt7921_dev *dev) mt76_wr(dev, MT_WFDMA0_TX_RING17_EXT_CTRL, PREFETCH(0x380, 0x4)); } +static u32 __mt7921_reg_addr(struct mt7921_dev *dev, u32 addr) +{ + static const struct { + u32 phys; + u32 mapped; + u32 size; + } fixed_map[] = { + { 0x00400000, 0x80000, 0x10000}, /* WF_MCU_SYSRAM */ + { 0x00410000, 0x90000, 0x10000}, /* WF_MCU_SYSRAM (configure register) */ + { 0x40000000, 0x70000, 0x10000}, /* WF_UMAC_SYSRAM */ + { 0x54000000, 0x02000, 0x1000 }, /* WFDMA PCIE0 MCU DMA0 */ + { 0x55000000, 0x03000, 0x1000 }, /* WFDMA PCIE0 MCU DMA1 */ + { 0x58000000, 0x06000, 0x1000 }, /* WFDMA PCIE1 MCU DMA0 (MEM_DMA) */ + { 0x59000000, 0x07000, 0x1000 }, /* WFDMA PCIE1 MCU DMA1 */ + { 0x7c000000, 0xf0000, 0x10000 }, /* CONN_INFRA */ + { 0x7c020000, 0xd0000, 0x10000 }, /* CONN_INFRA, WFDMA */ + { 0x7c060000, 0xe0000, 0x10000}, /* CONN_INFRA, conn_host_csr_top */ + { 0x80020000, 0xb0000, 0x10000 }, /* WF_TOP_MISC_OFF */ + { 0x81020000, 0xc0000, 0x10000 }, /* WF_TOP_MISC_ON */ + { 0x820c0000, 0x08000, 0x4000 }, /* WF_UMAC_TOP (PLE) */ + { 0x820c8000, 0x0c000, 0x2000 }, /* WF_UMAC_TOP (PSE) */ + { 0x820cc000, 0x0e000, 0x2000 }, /* WF_UMAC_TOP (PP) */ + { 0x820ce000, 0x21c00, 0x0200 }, /* WF_LMAC_TOP (WF_SEC) */ + { 0x820cf000, 0x22000, 0x1000 }, /* WF_LMAC_TOP (WF_PF) */ + { 0x820d0000, 0x30000, 0x10000 }, /* WF_LMAC_TOP (WF_WTBLON) */ + { 0x820e0000, 0x20000, 0x0400 }, /* WF_LMAC_TOP BN0 (WF_CFG) */ + { 0x820e1000, 0x20400, 0x0200 }, /* WF_LMAC_TOP BN0 (WF_TRB) */ + { 0x820e2000, 0x20800, 0x0400 }, /* WF_LMAC_TOP BN0 (WF_AGG) */ + { 0x820e3000, 0x20c00, 0x0400 }, /* WF_LMAC_TOP BN0 (WF_ARB) */ + { 0x820e4000, 0x21000, 0x0400 }, /* WF_LMAC_TOP BN0 (WF_TMAC) */ + { 0x820e5000, 0x21400, 0x0800 }, /* WF_LMAC_TOP BN0 (WF_RMAC) */ + { 0x820e7000, 0x21e00, 0x0200 }, /* WF_LMAC_TOP BN0 (WF_DMA) */ + { 0x820e9000, 0x23400, 0x0200 }, /* WF_LMAC_TOP BN0 (WF_WTBLOFF) */ + { 0x820ea000, 0x24000, 0x0200 }, /* WF_LMAC_TOP BN0 (WF_ETBF) */ + { 0x820eb000, 0x24200, 0x0400 }, /* WF_LMAC_TOP BN0 (WF_LPON) */ + { 0x820ec000, 0x24600, 0x0200 }, /* WF_LMAC_TOP BN0 (WF_INT) */ + { 0x820ed000, 0x24800, 0x0800 }, /* WF_LMAC_TOP BN0 (WF_MIB) */ + { 0x820f0000, 0xa0000, 0x0400 }, /* WF_LMAC_TOP BN1 (WF_CFG) */ + { 0x820f1000, 0xa0600, 0x0200 }, /* WF_LMAC_TOP BN1 (WF_TRB) */ + { 0x820f2000, 0xa0800, 0x0400 }, /* WF_LMAC_TOP BN1 (WF_AGG) */ + { 0x820f3000, 0xa0c00, 0x0400 }, /* WF_LMAC_TOP BN1 (WF_ARB) */ + { 0x820f4000, 0xa1000, 0x0400 }, /* WF_LMAC_TOP BN1 (WF_TMAC) */ + { 0x820f5000, 0xa1400, 0x0800 }, /* WF_LMAC_TOP BN1 (WF_RMAC) */ + { 0x820f7000, 0xa1e00, 0x0200 }, /* WF_LMAC_TOP BN1 (WF_DMA) */ + { 0x820f9000, 0xa3400, 0x0200 }, /* WF_LMAC_TOP BN1 (WF_WTBLOFF) */ + { 0x820fa000, 0xa4000, 0x0200 }, /* WF_LMAC_TOP BN1 (WF_ETBF) */ + { 0x820fb000, 0xa4200, 0x0400 }, /* WF_LMAC_TOP BN1 (WF_LPON) */ + { 0x820fc000, 0xa4600, 0x0200 }, /* WF_LMAC_TOP BN1 (WF_INT) */ + { 0x820fd000, 0xa4800, 0x0800 }, /* WF_LMAC_TOP BN1 (WF_MIB) */ + }; + int i; + + if (addr < 0x100000) + return addr; + + for (i = 0; i < ARRAY_SIZE(fixed_map); i++) { + u32 ofs; + + if (addr < fixed_map[i].phys) + continue; + + ofs = addr - fixed_map[i].phys; + if (ofs > fixed_map[i].size) + continue; + + return fixed_map[i].mapped + ofs; + } + + if ((addr >= 0x18000000 && addr < 0x18c00000) || + (addr >= 0x70000000 && addr < 0x78000000) || + (addr >= 0x7c000000 && addr < 0x7c400000)) + return mt7921_reg_map_l1(dev, addr); + + dev_err(dev->mt76.dev, "Access currently unsupported address %08x\n", + addr); + + return 0; +} + +static u32 mt7921_rr(struct mt76_dev *mdev, u32 offset) +{ + struct mt7921_dev *dev = container_of(mdev, struct mt7921_dev, mt76); + u32 addr = __mt7921_reg_addr(dev, offset); + + return dev->bus_ops->rr(mdev, addr); +} + +static void mt7921_wr(struct mt76_dev *mdev, u32 offset, u32 val) +{ + struct mt7921_dev *dev = container_of(mdev, struct mt7921_dev, mt76); + u32 addr = __mt7921_reg_addr(dev, offset); + + dev->bus_ops->wr(mdev, addr, val); +} + +static u32 mt7921_rmw(struct mt76_dev *mdev, u32 offset, u32 mask, u32 val) +{ + struct mt7921_dev *dev = container_of(mdev, struct mt7921_dev, mt76); + u32 addr = __mt7921_reg_addr(dev, offset); + + return dev->bus_ops->rmw(mdev, addr, mask, val); +} + static int mt7921_dma_disable(struct mt7921_dev *dev, bool force) { if (force) { @@ -277,8 +380,20 @@ int mt7921_wpdma_reinit_cond(struct mt7921_dev *dev) int mt7921_dma_init(struct mt7921_dev *dev) { + struct mt76_bus_ops *bus_ops; int ret; + dev->bus_ops = dev->mt76.bus; + bus_ops = devm_kmemdup(dev->mt76.dev, dev->bus_ops, sizeof(*bus_ops), + GFP_KERNEL); + if (!bus_ops) + return -ENOMEM; + + bus_ops->rr = mt7921_rr; + bus_ops->wr = mt7921_wr; + bus_ops->rmw = mt7921_rmw; + dev->mt76.bus = bus_ops; + mt76_dma_attach(&dev->mt76); ret = mt7921_dma_disable(dev, true); diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c index dabc0de2ec65dd..391bbb114750c4 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c +++ b/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c @@ -1308,6 +1308,8 @@ int mt7921_mcu_sta_update(struct mt7921_dev *dev, struct ieee80211_sta *sta, int __mt7921_mcu_drv_pmctrl(struct mt7921_dev *dev) { + struct mt76_phy *mphy = &dev->mt76.phy; + struct mt76_connac_pm *pm = &dev->pm; int i, err = 0; for (i = 0; i < MT7921_DRV_OWN_RETRY_COUNT; i++) { @@ -1320,8 +1322,16 @@ int __mt7921_mcu_drv_pmctrl(struct mt7921_dev *dev) if (i == MT7921_DRV_OWN_RETRY_COUNT) { dev_err(dev->mt76.dev, "driver own failed\n"); err = -EIO; + goto out; } + mt7921_wpdma_reinit_cond(dev); + clear_bit(MT76_STATE_PM, &mphy->state); + + pm->stats.last_wake_event = jiffies; + pm->stats.doze_time += pm->stats.last_wake_event - + pm->stats.last_doze_event; +out: return err; } @@ -1337,16 +1347,6 @@ int mt7921_mcu_drv_pmctrl(struct mt7921_dev *dev) goto out; err = __mt7921_mcu_drv_pmctrl(dev); - if (err < 0) - goto out; - - mt7921_wpdma_reinit_cond(dev); - clear_bit(MT76_STATE_PM, &mphy->state); - - pm->stats.last_wake_event = jiffies; - pm->stats.doze_time += pm->stats.last_wake_event - - pm->stats.last_doze_event; - out: mutex_unlock(&pm->mutex); diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/pci.c b/drivers/net/wireless/mediatek/mt76/mt7921/pci.c index 7d9b23a0023881..c3905bcab36047 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7921/pci.c +++ b/drivers/net/wireless/mediatek/mt76/mt7921/pci.c @@ -88,110 +88,6 @@ static void mt7921_irq_tasklet(unsigned long data) napi_schedule(&dev->mt76.napi[MT_RXQ_MAIN]); } -static u32 __mt7921_reg_addr(struct mt7921_dev *dev, u32 addr) -{ - static const struct { - u32 phys; - u32 mapped; - u32 size; - } fixed_map[] = { - { 0x00400000, 0x80000, 0x10000}, /* WF_MCU_SYSRAM */ - { 0x00410000, 0x90000, 0x10000}, /* WF_MCU_SYSRAM (configure register) */ - { 0x40000000, 0x70000, 0x10000}, /* WF_UMAC_SYSRAM */ - { 0x54000000, 0x02000, 0x1000 }, /* WFDMA PCIE0 MCU DMA0 */ - { 0x55000000, 0x03000, 0x1000 }, /* WFDMA PCIE0 MCU DMA1 */ - { 0x58000000, 0x06000, 0x1000 }, /* WFDMA PCIE1 MCU DMA0 (MEM_DMA) */ - { 0x59000000, 0x07000, 0x1000 }, /* WFDMA PCIE1 MCU DMA1 */ - { 0x7c000000, 0xf0000, 0x10000 }, /* CONN_INFRA */ - { 0x7c020000, 0xd0000, 0x10000 }, /* CONN_INFRA, WFDMA */ - { 0x7c060000, 0xe0000, 0x10000}, /* CONN_INFRA, conn_host_csr_top */ - { 0x80020000, 0xb0000, 0x10000 }, /* WF_TOP_MISC_OFF */ - { 0x81020000, 0xc0000, 0x10000 }, /* WF_TOP_MISC_ON */ - { 0x820c0000, 0x08000, 0x4000 }, /* WF_UMAC_TOP (PLE) */ - { 0x820c8000, 0x0c000, 0x2000 }, /* WF_UMAC_TOP (PSE) */ - { 0x820cc000, 0x0e000, 0x2000 }, /* WF_UMAC_TOP (PP) */ - { 0x820ce000, 0x21c00, 0x0200 }, /* WF_LMAC_TOP (WF_SEC) */ - { 0x820cf000, 0x22000, 0x1000 }, /* WF_LMAC_TOP (WF_PF) */ - { 0x820d0000, 0x30000, 0x10000 }, /* WF_LMAC_TOP (WF_WTBLON) */ - { 0x820e0000, 0x20000, 0x0400 }, /* WF_LMAC_TOP BN0 (WF_CFG) */ - { 0x820e1000, 0x20400, 0x0200 }, /* WF_LMAC_TOP BN0 (WF_TRB) */ - { 0x820e2000, 0x20800, 0x0400 }, /* WF_LMAC_TOP BN0 (WF_AGG) */ - { 0x820e3000, 0x20c00, 0x0400 }, /* WF_LMAC_TOP BN0 (WF_ARB) */ - { 0x820e4000, 0x21000, 0x0400 }, /* WF_LMAC_TOP BN0 (WF_TMAC) */ - { 0x820e5000, 0x21400, 0x0800 }, /* WF_LMAC_TOP BN0 (WF_RMAC) */ - { 0x820e7000, 0x21e00, 0x0200 }, /* WF_LMAC_TOP BN0 (WF_DMA) */ - { 0x820e9000, 0x23400, 0x0200 }, /* WF_LMAC_TOP BN0 (WF_WTBLOFF) */ - { 0x820ea000, 0x24000, 0x0200 }, /* WF_LMAC_TOP BN0 (WF_ETBF) */ - { 0x820eb000, 0x24200, 0x0400 }, /* WF_LMAC_TOP BN0 (WF_LPON) */ - { 0x820ec000, 0x24600, 0x0200 }, /* WF_LMAC_TOP BN0 (WF_INT) */ - { 0x820ed000, 0x24800, 0x0800 }, /* WF_LMAC_TOP BN0 (WF_MIB) */ - { 0x820f0000, 0xa0000, 0x0400 }, /* WF_LMAC_TOP BN1 (WF_CFG) */ - { 0x820f1000, 0xa0600, 0x0200 }, /* WF_LMAC_TOP BN1 (WF_TRB) */ - { 0x820f2000, 0xa0800, 0x0400 }, /* WF_LMAC_TOP BN1 (WF_AGG) */ - { 0x820f3000, 0xa0c00, 0x0400 }, /* WF_LMAC_TOP BN1 (WF_ARB) */ - { 0x820f4000, 0xa1000, 0x0400 }, /* WF_LMAC_TOP BN1 (WF_TMAC) */ - { 0x820f5000, 0xa1400, 0x0800 }, /* WF_LMAC_TOP BN1 (WF_RMAC) */ - { 0x820f7000, 0xa1e00, 0x0200 }, /* WF_LMAC_TOP BN1 (WF_DMA) */ - { 0x820f9000, 0xa3400, 0x0200 }, /* WF_LMAC_TOP BN1 (WF_WTBLOFF) */ - { 0x820fa000, 0xa4000, 0x0200 }, /* WF_LMAC_TOP BN1 (WF_ETBF) */ - { 0x820fb000, 0xa4200, 0x0400 }, /* WF_LMAC_TOP BN1 (WF_LPON) */ - { 0x820fc000, 0xa4600, 0x0200 }, /* WF_LMAC_TOP BN1 (WF_INT) */ - { 0x820fd000, 0xa4800, 0x0800 }, /* WF_LMAC_TOP BN1 (WF_MIB) */ - }; - int i; - - if (addr < 0x100000) - return addr; - - for (i = 0; i < ARRAY_SIZE(fixed_map); i++) { - u32 ofs; - - if (addr < fixed_map[i].phys) - continue; - - ofs = addr - fixed_map[i].phys; - if (ofs > fixed_map[i].size) - continue; - - return fixed_map[i].mapped + ofs; - } - - if ((addr >= 0x18000000 && addr < 0x18c00000) || - (addr >= 0x70000000 && addr < 0x78000000) || - (addr >= 0x7c000000 && addr < 0x7c400000)) - return mt7921_reg_map_l1(dev, addr); - - dev_err(dev->mt76.dev, "Access currently unsupported address %08x\n", - addr); - - return 0; -} - -static u32 mt7921_rr(struct mt76_dev *mdev, u32 offset) -{ - struct mt7921_dev *dev = container_of(mdev, struct mt7921_dev, mt76); - u32 addr = __mt7921_reg_addr(dev, offset); - - return dev->bus_ops->rr(mdev, addr); -} - -static void mt7921_wr(struct mt76_dev *mdev, u32 offset, u32 val) -{ - struct mt7921_dev *dev = container_of(mdev, struct mt7921_dev, mt76); - u32 addr = __mt7921_reg_addr(dev, offset); - - dev->bus_ops->wr(mdev, addr, val); -} - -static u32 mt7921_rmw(struct mt76_dev *mdev, u32 offset, u32 mask, u32 val) -{ - struct mt7921_dev *dev = container_of(mdev, struct mt7921_dev, mt76); - u32 addr = __mt7921_reg_addr(dev, offset); - - return dev->bus_ops->rmw(mdev, addr, mask, val); -} - - static int mt7921_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) { @@ -214,7 +110,6 @@ static int mt7921_pci_probe(struct pci_dev *pdev, .sta_remove = mt7921_mac_sta_remove, .update_survey = mt7921_update_channel, }; - struct mt76_bus_ops *bus_ops; struct mt7921_dev *dev; struct mt76_dev *mdev; int ret; @@ -250,22 +145,6 @@ static int mt7921_pci_probe(struct pci_dev *pdev, mt76_mmio_init(&dev->mt76, pcim_iomap_table(pdev)[0]); tasklet_init(&dev->irq_tasklet, mt7921_irq_tasklet, (unsigned long)dev); - - dev->bus_ops = dev->mt76.bus; - bus_ops = devm_kmemdup(dev->mt76.dev, dev->bus_ops, sizeof(*bus_ops), - GFP_KERNEL); - if (!bus_ops) - return -ENOMEM; - - bus_ops->rr = mt7921_rr; - bus_ops->wr = mt7921_wr; - bus_ops->rmw = mt7921_rmw; - dev->mt76.bus = bus_ops; - - ret = __mt7921_mcu_drv_pmctrl(dev); - if (ret) - return ret; - mdev->rev = (mt7921_l1_rr(dev, MT_HW_CHIPID) << 16) | (mt7921_l1_rr(dev, MT_HW_REV) & 0xff); dev_err(mdev->dev, "ASIC revision: %04x\n", mdev->rev); From c3bcf1f959f603e1525360523c2fce3a8b7ab3a2 Mon Sep 17 00:00:00 2001 From: Sean Wang Date: Fri, 15 Jul 2022 12:59:24 -0700 Subject: [PATCH 0770/1056] mt76: mt7921: use physical addr to unify register access commit f1e2eef111018a4f0d280656be4351c37e9e554b upstream. Use physical address to unify the register access and reorder the entries in fixed_map table to accelerate the address lookup for MT7921e. Cosmetics the patch with adding an extra space to make all entries in the array style consistent. Tested-by: Deren Wu Acked-by: Lorenzo Bianconi Signed-off-by: Sean Wang Signed-off-by: Felix Fietkau Signed-off-by: Greg Kroah-Hartman --- .../net/wireless/mediatek/mt76/mt7921/dma.c | 27 ++++++++++--------- .../net/wireless/mediatek/mt76/mt7921/regs.h | 22 +++++++-------- 2 files changed, 25 insertions(+), 24 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/dma.c b/drivers/net/wireless/mediatek/mt76/mt7921/dma.c index 7d7d43a5422f80..f74c385ec80fe3 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7921/dma.c +++ b/drivers/net/wireless/mediatek/mt76/mt7921/dma.c @@ -125,36 +125,37 @@ static u32 __mt7921_reg_addr(struct mt7921_dev *dev, u32 addr) u32 mapped; u32 size; } fixed_map[] = { - { 0x00400000, 0x80000, 0x10000}, /* WF_MCU_SYSRAM */ - { 0x00410000, 0x90000, 0x10000}, /* WF_MCU_SYSRAM (configure register) */ - { 0x40000000, 0x70000, 0x10000}, /* WF_UMAC_SYSRAM */ + { 0x820d0000, 0x30000, 0x10000 }, /* WF_LMAC_TOP (WF_WTBLON) */ + { 0x820ed000, 0x24800, 0x0800 }, /* WF_LMAC_TOP BN0 (WF_MIB) */ + { 0x820e4000, 0x21000, 0x0400 }, /* WF_LMAC_TOP BN0 (WF_TMAC) */ + { 0x820e7000, 0x21e00, 0x0200 }, /* WF_LMAC_TOP BN0 (WF_DMA) */ + { 0x820eb000, 0x24200, 0x0400 }, /* WF_LMAC_TOP BN0 (WF_LPON) */ + { 0x820e2000, 0x20800, 0x0400 }, /* WF_LMAC_TOP BN0 (WF_AGG) */ + { 0x820e3000, 0x20c00, 0x0400 }, /* WF_LMAC_TOP BN0 (WF_ARB) */ + { 0x820e5000, 0x21400, 0x0800 }, /* WF_LMAC_TOP BN0 (WF_RMAC) */ + { 0x00400000, 0x80000, 0x10000 }, /* WF_MCU_SYSRAM */ + { 0x00410000, 0x90000, 0x10000 }, /* WF_MCU_SYSRAM (configure register) */ + { 0x40000000, 0x70000, 0x10000 }, /* WF_UMAC_SYSRAM */ { 0x54000000, 0x02000, 0x1000 }, /* WFDMA PCIE0 MCU DMA0 */ { 0x55000000, 0x03000, 0x1000 }, /* WFDMA PCIE0 MCU DMA1 */ { 0x58000000, 0x06000, 0x1000 }, /* WFDMA PCIE1 MCU DMA0 (MEM_DMA) */ { 0x59000000, 0x07000, 0x1000 }, /* WFDMA PCIE1 MCU DMA1 */ { 0x7c000000, 0xf0000, 0x10000 }, /* CONN_INFRA */ { 0x7c020000, 0xd0000, 0x10000 }, /* CONN_INFRA, WFDMA */ - { 0x7c060000, 0xe0000, 0x10000}, /* CONN_INFRA, conn_host_csr_top */ + { 0x7c060000, 0xe0000, 0x10000 }, /* CONN_INFRA, conn_host_csr_top */ { 0x80020000, 0xb0000, 0x10000 }, /* WF_TOP_MISC_OFF */ { 0x81020000, 0xc0000, 0x10000 }, /* WF_TOP_MISC_ON */ { 0x820c0000, 0x08000, 0x4000 }, /* WF_UMAC_TOP (PLE) */ { 0x820c8000, 0x0c000, 0x2000 }, /* WF_UMAC_TOP (PSE) */ - { 0x820cc000, 0x0e000, 0x2000 }, /* WF_UMAC_TOP (PP) */ + { 0x820cc000, 0x0e000, 0x1000 }, /* WF_UMAC_TOP (PP) */ + { 0x820cd000, 0x0f000, 0x1000 }, /* WF_MDP_TOP */ { 0x820ce000, 0x21c00, 0x0200 }, /* WF_LMAC_TOP (WF_SEC) */ { 0x820cf000, 0x22000, 0x1000 }, /* WF_LMAC_TOP (WF_PF) */ - { 0x820d0000, 0x30000, 0x10000 }, /* WF_LMAC_TOP (WF_WTBLON) */ { 0x820e0000, 0x20000, 0x0400 }, /* WF_LMAC_TOP BN0 (WF_CFG) */ { 0x820e1000, 0x20400, 0x0200 }, /* WF_LMAC_TOP BN0 (WF_TRB) */ - { 0x820e2000, 0x20800, 0x0400 }, /* WF_LMAC_TOP BN0 (WF_AGG) */ - { 0x820e3000, 0x20c00, 0x0400 }, /* WF_LMAC_TOP BN0 (WF_ARB) */ - { 0x820e4000, 0x21000, 0x0400 }, /* WF_LMAC_TOP BN0 (WF_TMAC) */ - { 0x820e5000, 0x21400, 0x0800 }, /* WF_LMAC_TOP BN0 (WF_RMAC) */ - { 0x820e7000, 0x21e00, 0x0200 }, /* WF_LMAC_TOP BN0 (WF_DMA) */ { 0x820e9000, 0x23400, 0x0200 }, /* WF_LMAC_TOP BN0 (WF_WTBLOFF) */ { 0x820ea000, 0x24000, 0x0200 }, /* WF_LMAC_TOP BN0 (WF_ETBF) */ - { 0x820eb000, 0x24200, 0x0400 }, /* WF_LMAC_TOP BN0 (WF_LPON) */ { 0x820ec000, 0x24600, 0x0200 }, /* WF_LMAC_TOP BN0 (WF_INT) */ - { 0x820ed000, 0x24800, 0x0800 }, /* WF_LMAC_TOP BN0 (WF_MIB) */ { 0x820f0000, 0xa0000, 0x0400 }, /* WF_LMAC_TOP BN1 (WF_CFG) */ { 0x820f1000, 0xa0600, 0x0200 }, /* WF_LMAC_TOP BN1 (WF_TRB) */ { 0x820f2000, 0xa0800, 0x0400 }, /* WF_LMAC_TOP BN1 (WF_AGG) */ diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/regs.h b/drivers/net/wireless/mediatek/mt76/mt7921/regs.h index 41c2855e7a3d39..9266fb3909ca3c 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7921/regs.h +++ b/drivers/net/wireless/mediatek/mt76/mt7921/regs.h @@ -14,7 +14,7 @@ #define MT_MCU_INT_EVENT_SER_TRIGGER BIT(2) #define MT_MCU_INT_EVENT_RESET_DONE BIT(3) -#define MT_PLE_BASE 0x8000 +#define MT_PLE_BASE 0x820c0000 #define MT_PLE(ofs) (MT_PLE_BASE + (ofs)) #define MT_PLE_FL_Q0_CTRL MT_PLE(0x3e0) @@ -25,7 +25,7 @@ #define MT_PLE_AC_QEMPTY(_n) MT_PLE(0x500 + 0x40 * (_n)) #define MT_PLE_AMSDU_PACK_MSDU_CNT(n) MT_PLE(0x10e0 + ((n) << 2)) -#define MT_MDP_BASE 0xf000 +#define MT_MDP_BASE 0x820cd000 #define MT_MDP(ofs) (MT_MDP_BASE + (ofs)) #define MT_MDP_DCR0 MT_MDP(0x000) @@ -48,7 +48,7 @@ #define MT_MDP_TO_WM 1 /* TMAC: band 0(0x21000), band 1(0xa1000) */ -#define MT_WF_TMAC_BASE(_band) ((_band) ? 0xa1000 : 0x21000) +#define MT_WF_TMAC_BASE(_band) ((_band) ? 0x820f4000 : 0x820e4000) #define MT_WF_TMAC(_band, ofs) (MT_WF_TMAC_BASE(_band) + (ofs)) #define MT_TMAC_TCR0(_band) MT_WF_TMAC(_band, 0) @@ -73,7 +73,7 @@ #define MT_TMAC_TRCR0(_band) MT_WF_TMAC(_band, 0x09c) #define MT_TMAC_TFCR0(_band) MT_WF_TMAC(_band, 0x1e0) -#define MT_WF_DMA_BASE(_band) ((_band) ? 0xa1e00 : 0x21e00) +#define MT_WF_DMA_BASE(_band) ((_band) ? 0x820f7000 : 0x820e7000) #define MT_WF_DMA(_band, ofs) (MT_WF_DMA_BASE(_band) + (ofs)) #define MT_DMA_DCR0(_band) MT_WF_DMA(_band, 0x000) @@ -81,7 +81,7 @@ #define MT_DMA_DCR0_RXD_G5_EN BIT(23) /* LPON: band 0(0x24200), band 1(0xa4200) */ -#define MT_WF_LPON_BASE(_band) ((_band) ? 0xa4200 : 0x24200) +#define MT_WF_LPON_BASE(_band) ((_band) ? 0x820fb000 : 0x820eb000) #define MT_WF_LPON(_band, ofs) (MT_WF_LPON_BASE(_band) + (ofs)) #define MT_LPON_UTTR0(_band) MT_WF_LPON(_band, 0x080) @@ -92,7 +92,7 @@ #define MT_LPON_TCR_SW_WRITE BIT(0) /* MIB: band 0(0x24800), band 1(0xa4800) */ -#define MT_WF_MIB_BASE(_band) ((_band) ? 0xa4800 : 0x24800) +#define MT_WF_MIB_BASE(_band) ((_band) ? 0x820fd000 : 0x820ed000) #define MT_WF_MIB(_band, ofs) (MT_WF_MIB_BASE(_band) + (ofs)) #define MT_MIB_SCR1(_band) MT_WF_MIB(_band, 0x004) @@ -141,7 +141,7 @@ #define MT_MIB_ARNG(_band, n) MT_WF_MIB(_band, 0x0b0 + ((n) << 2)) #define MT_MIB_ARNCR_RANGE(val, n) (((val) >> ((n) << 3)) & GENMASK(7, 0)) -#define MT_WTBLON_TOP_BASE 0x34000 +#define MT_WTBLON_TOP_BASE 0x820d4000 #define MT_WTBLON_TOP(ofs) (MT_WTBLON_TOP_BASE + (ofs)) #define MT_WTBLON_TOP_WDUCR MT_WTBLON_TOP(0x200) #define MT_WTBLON_TOP_WDUCR_GROUP GENMASK(2, 0) @@ -151,7 +151,7 @@ #define MT_WTBL_UPDATE_ADM_COUNT_CLEAR BIT(12) #define MT_WTBL_UPDATE_BUSY BIT(31) -#define MT_WTBL_BASE 0x38000 +#define MT_WTBL_BASE 0x820d8000 #define MT_WTBL_LMAC_ID GENMASK(14, 8) #define MT_WTBL_LMAC_DW GENMASK(7, 2) #define MT_WTBL_LMAC_OFFS(_id, _dw) (MT_WTBL_BASE | \ @@ -159,7 +159,7 @@ FIELD_PREP(MT_WTBL_LMAC_DW, _dw)) /* AGG: band 0(0x20800), band 1(0xa0800) */ -#define MT_WF_AGG_BASE(_band) ((_band) ? 0xa0800 : 0x20800) +#define MT_WF_AGG_BASE(_band) ((_band) ? 0x820f2000 : 0x820e2000) #define MT_WF_AGG(_band, ofs) (MT_WF_AGG_BASE(_band) + (ofs)) #define MT_AGG_AWSCR0(_band, _n) MT_WF_AGG(_band, 0x05c + (_n) * 4) @@ -190,7 +190,7 @@ #define MT_AGG_ATCR3(_band) MT_WF_AGG(_band, 0x0f4) /* ARB: band 0(0x20c00), band 1(0xa0c00) */ -#define MT_WF_ARB_BASE(_band) ((_band) ? 0xa0c00 : 0x20c00) +#define MT_WF_ARB_BASE(_band) ((_band) ? 0x820f3000 : 0x820e3000) #define MT_WF_ARB(_band, ofs) (MT_WF_ARB_BASE(_band) + (ofs)) #define MT_ARB_SCR(_band) MT_WF_ARB(_band, 0x080) @@ -200,7 +200,7 @@ #define MT_ARB_DRNGR0(_band, _n) MT_WF_ARB(_band, 0x194 + (_n) * 4) /* RMAC: band 0(0x21400), band 1(0xa1400) */ -#define MT_WF_RMAC_BASE(_band) ((_band) ? 0xa1400 : 0x21400) +#define MT_WF_RMAC_BASE(_band) ((_band) ? 0x820f5000 : 0x820e5000) #define MT_WF_RMAC(_band, ofs) (MT_WF_RMAC_BASE(_band) + (ofs)) #define MT_WF_RFCR(_band) MT_WF_RMAC(_band, 0x000) From 22b910a7a378108654784c3625361e334553327c Mon Sep 17 00:00:00 2001 From: Sean Wang Date: Fri, 15 Jul 2022 12:59:25 -0700 Subject: [PATCH 0771/1056] mt76: mt7921e: fix possible probe failure after reboot commit 602cc0c9618a819ab00ea3c9400742a0ca318380 upstream. It doesn't guarantee the mt7921e gets started with ASPM L0 after each machine reboot on every platform. If mt7921e gets started with not ASPM L0, it would be possible that the driver encounters time to time failure in mt7921_pci_probe, like a weird chip identifier is read [ 215.514503] mt7921e 0000:05:00.0: ASIC revision: feed0000 [ 216.604741] mt7921e: probe of 0000:05:00.0 failed with error -110 or failing to init hardware because the driver is not allowed to access the register until the device is in ASPM L0 state. So, we call __mt7921e_mcu_drv_pmctrl in early mt7921_pci_probe to force the device to bring back to the L0 state for we can safely access registers in any case. In the patch, we move all functions from dma.c to pci.c and register mt76 bus operation earilier, that is the __mt7921e_mcu_drv_pmctrl depends on. Fixes: bf3747ae2e25 ("mt76: mt7921: enable aspm by default") Reported-by: Kai-Chuan Hsieh Co-developed-by: Deren Wu Signed-off-by: Deren Wu Signed-off-by: Sean Wang Signed-off-by: Felix Fietkau Signed-off-by: Greg Kroah-Hartman --- .../net/wireless/mediatek/mt76/mt7921/dma.c | 116 ----------------- .../net/wireless/mediatek/mt76/mt7921/mcu.c | 18 ++- .../wireless/mediatek/mt76/mt7921/mt7921.h | 1 + .../net/wireless/mediatek/mt76/mt7921/pci.c | 121 ++++++++++++++++++ 4 files changed, 136 insertions(+), 120 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/dma.c b/drivers/net/wireless/mediatek/mt76/mt7921/dma.c index f74c385ec80fe3..93d0cc1827d261 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7921/dma.c +++ b/drivers/net/wireless/mediatek/mt76/mt7921/dma.c @@ -118,110 +118,6 @@ static void mt7921_dma_prefetch(struct mt7921_dev *dev) mt76_wr(dev, MT_WFDMA0_TX_RING17_EXT_CTRL, PREFETCH(0x380, 0x4)); } -static u32 __mt7921_reg_addr(struct mt7921_dev *dev, u32 addr) -{ - static const struct { - u32 phys; - u32 mapped; - u32 size; - } fixed_map[] = { - { 0x820d0000, 0x30000, 0x10000 }, /* WF_LMAC_TOP (WF_WTBLON) */ - { 0x820ed000, 0x24800, 0x0800 }, /* WF_LMAC_TOP BN0 (WF_MIB) */ - { 0x820e4000, 0x21000, 0x0400 }, /* WF_LMAC_TOP BN0 (WF_TMAC) */ - { 0x820e7000, 0x21e00, 0x0200 }, /* WF_LMAC_TOP BN0 (WF_DMA) */ - { 0x820eb000, 0x24200, 0x0400 }, /* WF_LMAC_TOP BN0 (WF_LPON) */ - { 0x820e2000, 0x20800, 0x0400 }, /* WF_LMAC_TOP BN0 (WF_AGG) */ - { 0x820e3000, 0x20c00, 0x0400 }, /* WF_LMAC_TOP BN0 (WF_ARB) */ - { 0x820e5000, 0x21400, 0x0800 }, /* WF_LMAC_TOP BN0 (WF_RMAC) */ - { 0x00400000, 0x80000, 0x10000 }, /* WF_MCU_SYSRAM */ - { 0x00410000, 0x90000, 0x10000 }, /* WF_MCU_SYSRAM (configure register) */ - { 0x40000000, 0x70000, 0x10000 }, /* WF_UMAC_SYSRAM */ - { 0x54000000, 0x02000, 0x1000 }, /* WFDMA PCIE0 MCU DMA0 */ - { 0x55000000, 0x03000, 0x1000 }, /* WFDMA PCIE0 MCU DMA1 */ - { 0x58000000, 0x06000, 0x1000 }, /* WFDMA PCIE1 MCU DMA0 (MEM_DMA) */ - { 0x59000000, 0x07000, 0x1000 }, /* WFDMA PCIE1 MCU DMA1 */ - { 0x7c000000, 0xf0000, 0x10000 }, /* CONN_INFRA */ - { 0x7c020000, 0xd0000, 0x10000 }, /* CONN_INFRA, WFDMA */ - { 0x7c060000, 0xe0000, 0x10000 }, /* CONN_INFRA, conn_host_csr_top */ - { 0x80020000, 0xb0000, 0x10000 }, /* WF_TOP_MISC_OFF */ - { 0x81020000, 0xc0000, 0x10000 }, /* WF_TOP_MISC_ON */ - { 0x820c0000, 0x08000, 0x4000 }, /* WF_UMAC_TOP (PLE) */ - { 0x820c8000, 0x0c000, 0x2000 }, /* WF_UMAC_TOP (PSE) */ - { 0x820cc000, 0x0e000, 0x1000 }, /* WF_UMAC_TOP (PP) */ - { 0x820cd000, 0x0f000, 0x1000 }, /* WF_MDP_TOP */ - { 0x820ce000, 0x21c00, 0x0200 }, /* WF_LMAC_TOP (WF_SEC) */ - { 0x820cf000, 0x22000, 0x1000 }, /* WF_LMAC_TOP (WF_PF) */ - { 0x820e0000, 0x20000, 0x0400 }, /* WF_LMAC_TOP BN0 (WF_CFG) */ - { 0x820e1000, 0x20400, 0x0200 }, /* WF_LMAC_TOP BN0 (WF_TRB) */ - { 0x820e9000, 0x23400, 0x0200 }, /* WF_LMAC_TOP BN0 (WF_WTBLOFF) */ - { 0x820ea000, 0x24000, 0x0200 }, /* WF_LMAC_TOP BN0 (WF_ETBF) */ - { 0x820ec000, 0x24600, 0x0200 }, /* WF_LMAC_TOP BN0 (WF_INT) */ - { 0x820f0000, 0xa0000, 0x0400 }, /* WF_LMAC_TOP BN1 (WF_CFG) */ - { 0x820f1000, 0xa0600, 0x0200 }, /* WF_LMAC_TOP BN1 (WF_TRB) */ - { 0x820f2000, 0xa0800, 0x0400 }, /* WF_LMAC_TOP BN1 (WF_AGG) */ - { 0x820f3000, 0xa0c00, 0x0400 }, /* WF_LMAC_TOP BN1 (WF_ARB) */ - { 0x820f4000, 0xa1000, 0x0400 }, /* WF_LMAC_TOP BN1 (WF_TMAC) */ - { 0x820f5000, 0xa1400, 0x0800 }, /* WF_LMAC_TOP BN1 (WF_RMAC) */ - { 0x820f7000, 0xa1e00, 0x0200 }, /* WF_LMAC_TOP BN1 (WF_DMA) */ - { 0x820f9000, 0xa3400, 0x0200 }, /* WF_LMAC_TOP BN1 (WF_WTBLOFF) */ - { 0x820fa000, 0xa4000, 0x0200 }, /* WF_LMAC_TOP BN1 (WF_ETBF) */ - { 0x820fb000, 0xa4200, 0x0400 }, /* WF_LMAC_TOP BN1 (WF_LPON) */ - { 0x820fc000, 0xa4600, 0x0200 }, /* WF_LMAC_TOP BN1 (WF_INT) */ - { 0x820fd000, 0xa4800, 0x0800 }, /* WF_LMAC_TOP BN1 (WF_MIB) */ - }; - int i; - - if (addr < 0x100000) - return addr; - - for (i = 0; i < ARRAY_SIZE(fixed_map); i++) { - u32 ofs; - - if (addr < fixed_map[i].phys) - continue; - - ofs = addr - fixed_map[i].phys; - if (ofs > fixed_map[i].size) - continue; - - return fixed_map[i].mapped + ofs; - } - - if ((addr >= 0x18000000 && addr < 0x18c00000) || - (addr >= 0x70000000 && addr < 0x78000000) || - (addr >= 0x7c000000 && addr < 0x7c400000)) - return mt7921_reg_map_l1(dev, addr); - - dev_err(dev->mt76.dev, "Access currently unsupported address %08x\n", - addr); - - return 0; -} - -static u32 mt7921_rr(struct mt76_dev *mdev, u32 offset) -{ - struct mt7921_dev *dev = container_of(mdev, struct mt7921_dev, mt76); - u32 addr = __mt7921_reg_addr(dev, offset); - - return dev->bus_ops->rr(mdev, addr); -} - -static void mt7921_wr(struct mt76_dev *mdev, u32 offset, u32 val) -{ - struct mt7921_dev *dev = container_of(mdev, struct mt7921_dev, mt76); - u32 addr = __mt7921_reg_addr(dev, offset); - - dev->bus_ops->wr(mdev, addr, val); -} - -static u32 mt7921_rmw(struct mt76_dev *mdev, u32 offset, u32 mask, u32 val) -{ - struct mt7921_dev *dev = container_of(mdev, struct mt7921_dev, mt76); - u32 addr = __mt7921_reg_addr(dev, offset); - - return dev->bus_ops->rmw(mdev, addr, mask, val); -} - static int mt7921_dma_disable(struct mt7921_dev *dev, bool force) { if (force) { @@ -381,20 +277,8 @@ int mt7921_wpdma_reinit_cond(struct mt7921_dev *dev) int mt7921_dma_init(struct mt7921_dev *dev) { - struct mt76_bus_ops *bus_ops; int ret; - dev->bus_ops = dev->mt76.bus; - bus_ops = devm_kmemdup(dev->mt76.dev, dev->bus_ops, sizeof(*bus_ops), - GFP_KERNEL); - if (!bus_ops) - return -ENOMEM; - - bus_ops->rr = mt7921_rr; - bus_ops->wr = mt7921_wr; - bus_ops->rmw = mt7921_rmw; - dev->mt76.bus = bus_ops; - mt76_dma_attach(&dev->mt76); ret = mt7921_dma_disable(dev, true); diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c index 391bbb114750c4..9b490ff36bd6bc 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c +++ b/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c @@ -1306,10 +1306,8 @@ int mt7921_mcu_sta_update(struct mt7921_dev *dev, struct ieee80211_sta *sta, return mt76_connac_mcu_sta_cmd(&dev->mphy, &info); } -int __mt7921_mcu_drv_pmctrl(struct mt7921_dev *dev) +int __mt7921e_mcu_drv_pmctrl(struct mt7921_dev *dev) { - struct mt76_phy *mphy = &dev->mt76.phy; - struct mt76_connac_pm *pm = &dev->pm; int i, err = 0; for (i = 0; i < MT7921_DRV_OWN_RETRY_COUNT; i++) { @@ -1322,9 +1320,21 @@ int __mt7921_mcu_drv_pmctrl(struct mt7921_dev *dev) if (i == MT7921_DRV_OWN_RETRY_COUNT) { dev_err(dev->mt76.dev, "driver own failed\n"); err = -EIO; - goto out; } + return err; +} + +int __mt7921_mcu_drv_pmctrl(struct mt7921_dev *dev) +{ + struct mt76_phy *mphy = &dev->mt76.phy; + struct mt76_connac_pm *pm = &dev->pm; + int err; + + err = __mt7921e_mcu_drv_pmctrl(dev); + if (err < 0) + goto out; + mt7921_wpdma_reinit_cond(dev); clear_bit(MT76_STATE_PM, &mphy->state); diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h b/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h index 32d4f2cab94e2f..6eb03d6705a1f6 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h +++ b/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h @@ -374,6 +374,7 @@ int mt7921_mcu_uni_rx_ba(struct mt7921_dev *dev, bool enable); void mt7921_scan_work(struct work_struct *work); int mt7921_mcu_uni_bss_ps(struct mt7921_dev *dev, struct ieee80211_vif *vif); +int __mt7921e_mcu_drv_pmctrl(struct mt7921_dev *dev); int __mt7921_mcu_drv_pmctrl(struct mt7921_dev *dev); int mt7921_mcu_drv_pmctrl(struct mt7921_dev *dev); int mt7921_mcu_fw_pmctrl(struct mt7921_dev *dev); diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/pci.c b/drivers/net/wireless/mediatek/mt76/mt7921/pci.c index c3905bcab36047..815b926e52e9e5 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7921/pci.c +++ b/drivers/net/wireless/mediatek/mt76/mt7921/pci.c @@ -88,6 +88,110 @@ static void mt7921_irq_tasklet(unsigned long data) napi_schedule(&dev->mt76.napi[MT_RXQ_MAIN]); } +static u32 __mt7921_reg_addr(struct mt7921_dev *dev, u32 addr) +{ + static const struct { + u32 phys; + u32 mapped; + u32 size; + } fixed_map[] = { + { 0x820d0000, 0x30000, 0x10000 }, /* WF_LMAC_TOP (WF_WTBLON) */ + { 0x820ed000, 0x24800, 0x0800 }, /* WF_LMAC_TOP BN0 (WF_MIB) */ + { 0x820e4000, 0x21000, 0x0400 }, /* WF_LMAC_TOP BN0 (WF_TMAC) */ + { 0x820e7000, 0x21e00, 0x0200 }, /* WF_LMAC_TOP BN0 (WF_DMA) */ + { 0x820eb000, 0x24200, 0x0400 }, /* WF_LMAC_TOP BN0 (WF_LPON) */ + { 0x820e2000, 0x20800, 0x0400 }, /* WF_LMAC_TOP BN0 (WF_AGG) */ + { 0x820e3000, 0x20c00, 0x0400 }, /* WF_LMAC_TOP BN0 (WF_ARB) */ + { 0x820e5000, 0x21400, 0x0800 }, /* WF_LMAC_TOP BN0 (WF_RMAC) */ + { 0x00400000, 0x80000, 0x10000 }, /* WF_MCU_SYSRAM */ + { 0x00410000, 0x90000, 0x10000 }, /* WF_MCU_SYSRAM (configure register) */ + { 0x40000000, 0x70000, 0x10000 }, /* WF_UMAC_SYSRAM */ + { 0x54000000, 0x02000, 0x1000 }, /* WFDMA PCIE0 MCU DMA0 */ + { 0x55000000, 0x03000, 0x1000 }, /* WFDMA PCIE0 MCU DMA1 */ + { 0x58000000, 0x06000, 0x1000 }, /* WFDMA PCIE1 MCU DMA0 (MEM_DMA) */ + { 0x59000000, 0x07000, 0x1000 }, /* WFDMA PCIE1 MCU DMA1 */ + { 0x7c000000, 0xf0000, 0x10000 }, /* CONN_INFRA */ + { 0x7c020000, 0xd0000, 0x10000 }, /* CONN_INFRA, WFDMA */ + { 0x7c060000, 0xe0000, 0x10000 }, /* CONN_INFRA, conn_host_csr_top */ + { 0x80020000, 0xb0000, 0x10000 }, /* WF_TOP_MISC_OFF */ + { 0x81020000, 0xc0000, 0x10000 }, /* WF_TOP_MISC_ON */ + { 0x820c0000, 0x08000, 0x4000 }, /* WF_UMAC_TOP (PLE) */ + { 0x820c8000, 0x0c000, 0x2000 }, /* WF_UMAC_TOP (PSE) */ + { 0x820cc000, 0x0e000, 0x1000 }, /* WF_UMAC_TOP (PP) */ + { 0x820cd000, 0x0f000, 0x1000 }, /* WF_MDP_TOP */ + { 0x820ce000, 0x21c00, 0x0200 }, /* WF_LMAC_TOP (WF_SEC) */ + { 0x820cf000, 0x22000, 0x1000 }, /* WF_LMAC_TOP (WF_PF) */ + { 0x820e0000, 0x20000, 0x0400 }, /* WF_LMAC_TOP BN0 (WF_CFG) */ + { 0x820e1000, 0x20400, 0x0200 }, /* WF_LMAC_TOP BN0 (WF_TRB) */ + { 0x820e9000, 0x23400, 0x0200 }, /* WF_LMAC_TOP BN0 (WF_WTBLOFF) */ + { 0x820ea000, 0x24000, 0x0200 }, /* WF_LMAC_TOP BN0 (WF_ETBF) */ + { 0x820ec000, 0x24600, 0x0200 }, /* WF_LMAC_TOP BN0 (WF_INT) */ + { 0x820f0000, 0xa0000, 0x0400 }, /* WF_LMAC_TOP BN1 (WF_CFG) */ + { 0x820f1000, 0xa0600, 0x0200 }, /* WF_LMAC_TOP BN1 (WF_TRB) */ + { 0x820f2000, 0xa0800, 0x0400 }, /* WF_LMAC_TOP BN1 (WF_AGG) */ + { 0x820f3000, 0xa0c00, 0x0400 }, /* WF_LMAC_TOP BN1 (WF_ARB) */ + { 0x820f4000, 0xa1000, 0x0400 }, /* WF_LMAC_TOP BN1 (WF_TMAC) */ + { 0x820f5000, 0xa1400, 0x0800 }, /* WF_LMAC_TOP BN1 (WF_RMAC) */ + { 0x820f7000, 0xa1e00, 0x0200 }, /* WF_LMAC_TOP BN1 (WF_DMA) */ + { 0x820f9000, 0xa3400, 0x0200 }, /* WF_LMAC_TOP BN1 (WF_WTBLOFF) */ + { 0x820fa000, 0xa4000, 0x0200 }, /* WF_LMAC_TOP BN1 (WF_ETBF) */ + { 0x820fb000, 0xa4200, 0x0400 }, /* WF_LMAC_TOP BN1 (WF_LPON) */ + { 0x820fc000, 0xa4600, 0x0200 }, /* WF_LMAC_TOP BN1 (WF_INT) */ + { 0x820fd000, 0xa4800, 0x0800 }, /* WF_LMAC_TOP BN1 (WF_MIB) */ + }; + int i; + + if (addr < 0x100000) + return addr; + + for (i = 0; i < ARRAY_SIZE(fixed_map); i++) { + u32 ofs; + + if (addr < fixed_map[i].phys) + continue; + + ofs = addr - fixed_map[i].phys; + if (ofs > fixed_map[i].size) + continue; + + return fixed_map[i].mapped + ofs; + } + + if ((addr >= 0x18000000 && addr < 0x18c00000) || + (addr >= 0x70000000 && addr < 0x78000000) || + (addr >= 0x7c000000 && addr < 0x7c400000)) + return mt7921_reg_map_l1(dev, addr); + + dev_err(dev->mt76.dev, "Access currently unsupported address %08x\n", + addr); + + return 0; +} + +static u32 mt7921_rr(struct mt76_dev *mdev, u32 offset) +{ + struct mt7921_dev *dev = container_of(mdev, struct mt7921_dev, mt76); + u32 addr = __mt7921_reg_addr(dev, offset); + + return dev->bus_ops->rr(mdev, addr); +} + +static void mt7921_wr(struct mt76_dev *mdev, u32 offset, u32 val) +{ + struct mt7921_dev *dev = container_of(mdev, struct mt7921_dev, mt76); + u32 addr = __mt7921_reg_addr(dev, offset); + + dev->bus_ops->wr(mdev, addr, val); +} + +static u32 mt7921_rmw(struct mt76_dev *mdev, u32 offset, u32 mask, u32 val) +{ + struct mt7921_dev *dev = container_of(mdev, struct mt7921_dev, mt76); + u32 addr = __mt7921_reg_addr(dev, offset); + + return dev->bus_ops->rmw(mdev, addr, mask, val); +} + static int mt7921_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) { @@ -110,6 +214,7 @@ static int mt7921_pci_probe(struct pci_dev *pdev, .sta_remove = mt7921_mac_sta_remove, .update_survey = mt7921_update_channel, }; + struct mt76_bus_ops *bus_ops; struct mt7921_dev *dev; struct mt76_dev *mdev; int ret; @@ -145,6 +250,22 @@ static int mt7921_pci_probe(struct pci_dev *pdev, mt76_mmio_init(&dev->mt76, pcim_iomap_table(pdev)[0]); tasklet_init(&dev->irq_tasklet, mt7921_irq_tasklet, (unsigned long)dev); + + dev->bus_ops = dev->mt76.bus; + bus_ops = devm_kmemdup(dev->mt76.dev, dev->bus_ops, sizeof(*bus_ops), + GFP_KERNEL); + if (!bus_ops) + return -ENOMEM; + + bus_ops->rr = mt7921_rr; + bus_ops->wr = mt7921_wr; + bus_ops->rmw = mt7921_rmw; + dev->mt76.bus = bus_ops; + + ret = __mt7921e_mcu_drv_pmctrl(dev); + if (ret) + return ret; + mdev->rev = (mt7921_l1_rr(dev, MT_HW_CHIPID) << 16) | (mt7921_l1_rr(dev, MT_HW_REV) & 0xff); dev_err(mdev->dev, "ASIC revision: %04x\n", mdev->rev); From cf719adb6fa91d49ef6009d1fab624dc0830ebcd Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Fri, 15 Jul 2022 12:59:26 -0700 Subject: [PATCH 0772/1056] mt76: mt7921: Fix the error handling path of mt7921_pci_probe() commit 4e90db5e21eb3bb272fe47386dc3506755e209e9 upstream. In case of error, some resources must be freed, as already done above and below the devm_kmemdup() and __mt7921e_mcu_drv_pmctrl() calls added in the commit in Fixes:. Fixes: 602cc0c9618a ("mt76: mt7921e: fix possible probe failure after reboot") Signed-off-by: Christophe JAILLET Signed-off-by: Felix Fietkau Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/mediatek/mt76/mt7921/pci.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/pci.c b/drivers/net/wireless/mediatek/mt76/mt7921/pci.c index 815b926e52e9e5..36e6495ae6588a 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7921/pci.c +++ b/drivers/net/wireless/mediatek/mt76/mt7921/pci.c @@ -254,8 +254,10 @@ static int mt7921_pci_probe(struct pci_dev *pdev, dev->bus_ops = dev->mt76.bus; bus_ops = devm_kmemdup(dev->mt76.dev, dev->bus_ops, sizeof(*bus_ops), GFP_KERNEL); - if (!bus_ops) - return -ENOMEM; + if (!bus_ops) { + ret = -ENOMEM; + goto err_free_dev; + } bus_ops->rr = mt7921_rr; bus_ops->wr = mt7921_wr; @@ -264,7 +266,7 @@ static int mt7921_pci_probe(struct pci_dev *pdev, ret = __mt7921e_mcu_drv_pmctrl(dev); if (ret) - return ret; + goto err_free_dev; mdev->rev = (mt7921_l1_rr(dev, MT_HW_CHIPID) << 16) | (mt7921_l1_rr(dev, MT_HW_REV) & 0xff); From 67cb74213a5216c4452a78a0dfbb5a3724c1b706 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Thu, 21 Jul 2022 14:36:05 -0700 Subject: [PATCH 0773/1056] xfs: fix maxlevels comparisons in the btree staging code [ Upstream commit 78e8ec83a404d63dcc86b251f42e4ee8aff27465 ] The btree geometry computation function has an off-by-one error in that it does not allow maximally tall btrees (nlevels == XFS_BTREE_MAXLEVELS). This can result in repairs failing unnecessarily on very fragmented filesystems. Subsequent patches to remove MAXLEVELS usage in favor of the per-btree type computations will make this a much more likely occurrence. Signed-off-by: Darrick J. Wong Reviewed-by: Chandan Babu R Reviewed-by: Christoph Hellwig Signed-off-by: Leah Rumancik Acked-by: Darrick J. Wong Signed-off-by: Greg Kroah-Hartman --- fs/xfs/libxfs/xfs_btree_staging.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/xfs/libxfs/xfs_btree_staging.c b/fs/xfs/libxfs/xfs_btree_staging.c index ac9e80152b5cff..89c8a1498df1d3 100644 --- a/fs/xfs/libxfs/xfs_btree_staging.c +++ b/fs/xfs/libxfs/xfs_btree_staging.c @@ -662,7 +662,7 @@ xfs_btree_bload_compute_geometry( xfs_btree_bload_ensure_slack(cur, &bbl->node_slack, 1); bbl->nr_records = nr_this_level = nr_records; - for (cur->bc_nlevels = 1; cur->bc_nlevels < XFS_BTREE_MAXLEVELS;) { + for (cur->bc_nlevels = 1; cur->bc_nlevels <= XFS_BTREE_MAXLEVELS;) { uint64_t level_blocks; uint64_t dontcare64; unsigned int level = cur->bc_nlevels - 1; @@ -724,7 +724,7 @@ xfs_btree_bload_compute_geometry( nr_this_level = level_blocks; } - if (cur->bc_nlevels == XFS_BTREE_MAXLEVELS) + if (cur->bc_nlevels > XFS_BTREE_MAXLEVELS) return -EOVERFLOW; bbl->btree_height = cur->bc_nlevels; From 4d6f22f6cc58affc9803b7030835ff68c6987802 Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Thu, 21 Jul 2022 14:36:06 -0700 Subject: [PATCH 0774/1056] xfs: fold perag loop iteration logic into helper function [ Upstream commit bf2307b195135ed9c95eebb38920d8bd41843092 ] Fold the loop iteration logic into a helper in preparation for further fixups. No functional change in this patch. [backport: dependency for f1788b5e5ee25bedf00bb4d25f82b93820d61189] Signed-off-by: Brian Foster Reviewed-by: Dave Chinner Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong Signed-off-by: Leah Rumancik Acked-by: Darrick J. Wong Signed-off-by: Greg Kroah-Hartman --- fs/xfs/libxfs/xfs_ag.h | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/fs/xfs/libxfs/xfs_ag.h b/fs/xfs/libxfs/xfs_ag.h index 4c6f9045baca07..ddb89e10b6ead0 100644 --- a/fs/xfs/libxfs/xfs_ag.h +++ b/fs/xfs/libxfs/xfs_ag.h @@ -124,12 +124,22 @@ void xfs_perag_put(struct xfs_perag *pag); * for_each_perag_from() because they terminate at sb_agcount where there are * no perag structures in tree beyond end_agno. */ +static inline struct xfs_perag * +xfs_perag_next( + struct xfs_perag *pag, + xfs_agnumber_t *next_agno) +{ + struct xfs_mount *mp = pag->pag_mount; + + *next_agno = pag->pag_agno + 1; + xfs_perag_put(pag); + return xfs_perag_get(mp, *next_agno); +} + #define for_each_perag_range(mp, next_agno, end_agno, pag) \ for ((pag) = xfs_perag_get((mp), (next_agno)); \ (pag) != NULL && (next_agno) <= (end_agno); \ - (next_agno) = (pag)->pag_agno + 1, \ - xfs_perag_put(pag), \ - (pag) = xfs_perag_get((mp), (next_agno))) + (pag) = xfs_perag_next((pag), &(next_agno))) #define for_each_perag_from(mp, next_agno, pag) \ for_each_perag_range((mp), (next_agno), (mp)->m_sb.sb_agcount, (pag)) From 2991d51b42a38b730b2309c9e699345892dd56d2 Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Thu, 21 Jul 2022 14:36:07 -0700 Subject: [PATCH 0775/1056] xfs: rename the next_agno perag iteration variable [ Upstream commit f1788b5e5ee25bedf00bb4d25f82b93820d61189 ] Rename the next_agno variable to be consistent across the several iteration macros and shorten line length. [backport: dependency for 8ed004eb9d07a5d6114db3e97a166707c186262d] Signed-off-by: Brian Foster Reviewed-by: Dave Chinner Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong Signed-off-by: Leah Rumancik Acked-by: Darrick J. Wong Signed-off-by: Greg Kroah-Hartman --- fs/xfs/libxfs/xfs_ag.h | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/fs/xfs/libxfs/xfs_ag.h b/fs/xfs/libxfs/xfs_ag.h index ddb89e10b6ead0..134e8635dee116 100644 --- a/fs/xfs/libxfs/xfs_ag.h +++ b/fs/xfs/libxfs/xfs_ag.h @@ -127,22 +127,22 @@ void xfs_perag_put(struct xfs_perag *pag); static inline struct xfs_perag * xfs_perag_next( struct xfs_perag *pag, - xfs_agnumber_t *next_agno) + xfs_agnumber_t *agno) { struct xfs_mount *mp = pag->pag_mount; - *next_agno = pag->pag_agno + 1; + *agno = pag->pag_agno + 1; xfs_perag_put(pag); - return xfs_perag_get(mp, *next_agno); + return xfs_perag_get(mp, *agno); } -#define for_each_perag_range(mp, next_agno, end_agno, pag) \ - for ((pag) = xfs_perag_get((mp), (next_agno)); \ - (pag) != NULL && (next_agno) <= (end_agno); \ - (pag) = xfs_perag_next((pag), &(next_agno))) +#define for_each_perag_range(mp, agno, end_agno, pag) \ + for ((pag) = xfs_perag_get((mp), (agno)); \ + (pag) != NULL && (agno) <= (end_agno); \ + (pag) = xfs_perag_next((pag), &(agno))) -#define for_each_perag_from(mp, next_agno, pag) \ - for_each_perag_range((mp), (next_agno), (mp)->m_sb.sb_agcount, (pag)) +#define for_each_perag_from(mp, agno, pag) \ + for_each_perag_range((mp), (agno), (mp)->m_sb.sb_agcount, (pag)) #define for_each_perag(mp, agno, pag) \ From 768bfde1cf3e436393be175697cf02eb889ab46f Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Thu, 21 Jul 2022 14:36:08 -0700 Subject: [PATCH 0776/1056] xfs: terminate perag iteration reliably on agcount [ Upstream commit 8ed004eb9d07a5d6114db3e97a166707c186262d ] The for_each_perag_from() iteration macro relies on sb_agcount to process every perag currently within EOFS from a given starting point. It's perfectly valid to have perag structures beyond sb_agcount, however, such as if a growfs is in progress. If a perag loop happens to race with growfs in this manner, it will actually attempt to process the post-EOFS perag where ->pag_agno == sb_agcount. This is reproduced by xfs/104 and manifests as the following assert failure in superblock write verifier context: XFS: Assertion failed: agno < mp->m_sb.sb_agcount, file: fs/xfs/libxfs/xfs_types.c, line: 22 Update the corresponding macro to only process perags that are within the current sb_agcount. Fixes: 58d43a7e3263 ("xfs: pass perags around in fsmap data dev functions") Signed-off-by: Brian Foster Reviewed-by: Dave Chinner Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong Signed-off-by: Leah Rumancik Acked-by: Darrick J. Wong Signed-off-by: Greg Kroah-Hartman --- fs/xfs/libxfs/xfs_ag.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/xfs/libxfs/xfs_ag.h b/fs/xfs/libxfs/xfs_ag.h index 134e8635dee116..4585ebb3f45009 100644 --- a/fs/xfs/libxfs/xfs_ag.h +++ b/fs/xfs/libxfs/xfs_ag.h @@ -142,7 +142,7 @@ xfs_perag_next( (pag) = xfs_perag_next((pag), &(agno))) #define for_each_perag_from(mp, agno, pag) \ - for_each_perag_range((mp), (agno), (mp)->m_sb.sb_agcount, (pag)) + for_each_perag_range((mp), (agno), (mp)->m_sb.sb_agcount - 1, (pag)) #define for_each_perag(mp, agno, pag) \ From 1da0b50ea07114fe1fe85ca126f8df1065633d59 Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Thu, 21 Jul 2022 14:36:09 -0700 Subject: [PATCH 0777/1056] xfs: fix perag reference leak on iteration race with growfs [ Upstream commit 892a666fafa19ab04b5e948f6c92f98f1dafb489 ] The for_each_perag*() set of macros are hacky in that some (i.e. those based on sb_agcount) rely on the assumption that perag iteration terminates naturally with a NULL perag at the specified end_agno. Others allow for the final AG to have a valid perag and require the calling function to clean up any potential leftover xfs_perag reference on termination of the loop. Aside from providing a subtly inconsistent interface, the former variant is racy with growfs because growfs can create discoverable post-eofs perags before the final superblock update that completes the grow operation and increases sb_agcount. This leads to the following assert failure (reproduced by xfs/104) in the perag free path during unmount: XFS: Assertion failed: atomic_read(&pag->pag_ref) == 0, file: fs/xfs/libxfs/xfs_ag.c, line: 195 This occurs because one of the many for_each_perag() loops in the code that is expected to terminate with a NULL pag (and thus has no post-loop xfs_perag_put() check) raced with a growfs and found a non-NULL post-EOFS perag, but terminated naturally based on the end_agno check without releasing the post-EOFS perag. Rework the iteration logic to lift the agno check from the main for loop conditional to the iteration helper function. The for loop now purely terminates on a NULL pag and xfs_perag_next() avoids taking a reference to any perag beyond end_agno in the first place. Fixes: f250eedcf762 ("xfs: make for_each_perag... a first class citizen") Signed-off-by: Brian Foster Reviewed-by: Dave Chinner Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong Signed-off-by: Leah Rumancik Acked-by: Darrick J. Wong Signed-off-by: Greg Kroah-Hartman --- fs/xfs/libxfs/xfs_ag.h | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/fs/xfs/libxfs/xfs_ag.h b/fs/xfs/libxfs/xfs_ag.h index 4585ebb3f45009..3f597cad2c3334 100644 --- a/fs/xfs/libxfs/xfs_ag.h +++ b/fs/xfs/libxfs/xfs_ag.h @@ -116,30 +116,26 @@ void xfs_perag_put(struct xfs_perag *pag); /* * Perag iteration APIs - * - * XXX: for_each_perag_range() usage really needs an iterator to clean up when - * we terminate at end_agno because we may have taken a reference to the perag - * beyond end_agno. Right now callers have to be careful to catch and clean that - * up themselves. This is not necessary for the callers of for_each_perag() and - * for_each_perag_from() because they terminate at sb_agcount where there are - * no perag structures in tree beyond end_agno. */ static inline struct xfs_perag * xfs_perag_next( struct xfs_perag *pag, - xfs_agnumber_t *agno) + xfs_agnumber_t *agno, + xfs_agnumber_t end_agno) { struct xfs_mount *mp = pag->pag_mount; *agno = pag->pag_agno + 1; xfs_perag_put(pag); + if (*agno > end_agno) + return NULL; return xfs_perag_get(mp, *agno); } #define for_each_perag_range(mp, agno, end_agno, pag) \ for ((pag) = xfs_perag_get((mp), (agno)); \ - (pag) != NULL && (agno) <= (end_agno); \ - (pag) = xfs_perag_next((pag), &(agno))) + (pag) != NULL; \ + (pag) = xfs_perag_next((pag), &(agno), (end_agno))) #define for_each_perag_from(mp, agno, pag) \ for_each_perag_range((mp), (agno), (mp)->m_sb.sb_agcount - 1, (pag)) From 44addae95ed747f270fa74c7e90036854203e37b Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 21 Jul 2022 14:36:10 -0700 Subject: [PATCH 0778/1056] xfs: prevent a WARN_ONCE() in xfs_ioc_attr_list() [ Upstream commit 6ed6356b07714e0198be3bc3ecccc8b40a212de4 ] The "bufsize" comes from the root user. If "bufsize" is negative then, because of type promotion, neither of the validation checks at the start of the function are able to catch it: if (bufsize < sizeof(struct xfs_attrlist) || bufsize > XFS_XATTR_LIST_MAX) return -EINVAL; This means "bufsize" will trigger (WARN_ON_ONCE(size > INT_MAX)) in kvmalloc_node(). Fix this by changing the type from int to size_t. Signed-off-by: Dan Carpenter Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong Signed-off-by: Leah Rumancik Acked-by: Darrick J. Wong Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_ioctl.c | 2 +- fs/xfs/xfs_ioctl.h | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index 09269f478df9c7..fba52e75e98b6f 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -372,7 +372,7 @@ int xfs_ioc_attr_list( struct xfs_inode *dp, void __user *ubuf, - int bufsize, + size_t bufsize, int flags, struct xfs_attrlist_cursor __user *ucursor) { diff --git a/fs/xfs/xfs_ioctl.h b/fs/xfs/xfs_ioctl.h index 28453a6d446185..845d3bcab74b40 100644 --- a/fs/xfs/xfs_ioctl.h +++ b/fs/xfs/xfs_ioctl.h @@ -38,8 +38,9 @@ xfs_readlink_by_handle( int xfs_ioc_attrmulti_one(struct file *parfilp, struct inode *inode, uint32_t opcode, void __user *uname, void __user *value, uint32_t *len, uint32_t flags); -int xfs_ioc_attr_list(struct xfs_inode *dp, void __user *ubuf, int bufsize, - int flags, struct xfs_attrlist_cursor __user *ucursor); +int xfs_ioc_attr_list(struct xfs_inode *dp, void __user *ubuf, + size_t bufsize, int flags, + struct xfs_attrlist_cursor __user *ucursor); extern struct dentry * xfs_handle_to_dentry( From 6bf450d92d0c5673ea5a7934083758c579a7fef0 Mon Sep 17 00:00:00 2001 From: Hayes Wang Date: Mon, 18 Jul 2022 16:21:20 +0800 Subject: [PATCH 0779/1056] r8152: fix a WOL issue commit cdf0b86b250fd3c1c3e120c86583ea510c52e4ce upstream. This fixes that the platform is waked by an unexpected packet. The size and range of FIFO is different when the device enters S3 state, so it is necessary to correct some settings when suspending. Regardless of jumbo frame, set RMS to 1522 and MTPS to MTPS_DEFAULT. Besides, enable MCU_BORW_EN to update the method of calculating the pointer of data. Then, the hardware could get the correct data. Fixes: 195aae321c82 ("r8152: support new chips") Signed-off-by: Hayes Wang Link: https://lore.kernel.org/r/20220718082120.10957-391-nic_swsd@realtek.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/net/usb/r8152.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index d467a9f3bb44d4..0d1d92ef790990 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -32,7 +32,7 @@ #define NETNEXT_VERSION "12" /* Information for net */ -#define NET_VERSION "12" +#define NET_VERSION "13" #define DRIVER_VERSION "v1." NETNEXT_VERSION "." NET_VERSION #define DRIVER_AUTHOR "Realtek linux nic maintainers " @@ -5915,7 +5915,8 @@ static void r8153_enter_oob(struct r8152 *tp) wait_oob_link_list_ready(tp); - ocp_write_word(tp, MCU_TYPE_PLA, PLA_RMS, mtu_to_size(tp->netdev->mtu)); + ocp_write_word(tp, MCU_TYPE_PLA, PLA_RMS, 1522); + ocp_write_byte(tp, MCU_TYPE_PLA, PLA_MTPS, MTPS_DEFAULT); switch (tp->version) { case RTL_VER_03: @@ -5951,6 +5952,10 @@ static void r8153_enter_oob(struct r8152 *tp) ocp_data |= NOW_IS_OOB | DIS_MCU_CLROOB; ocp_write_byte(tp, MCU_TYPE_PLA, PLA_OOB_CTRL, ocp_data); + ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_SFF_STS_7); + ocp_data |= MCU_BORW_EN; + ocp_write_word(tp, MCU_TYPE_PLA, PLA_SFF_STS_7, ocp_data); + rxdy_gated_en(tp, false); ocp_data = ocp_read_dword(tp, MCU_TYPE_PLA, PLA_RCR); @@ -6553,6 +6558,9 @@ static void rtl8156_down(struct r8152 *tp) rtl_disable(tp); rtl_reset_bmu(tp); + ocp_write_word(tp, MCU_TYPE_PLA, PLA_RMS, 1522); + ocp_write_byte(tp, MCU_TYPE_PLA, PLA_MTPS, MTPS_DEFAULT); + /* Clear teredo wake event. bit[15:8] is the teredo wakeup * type. Set it to zero. bits[7:0] are the W1C bits about * the events. Set them to all 1 to clear them. @@ -6563,6 +6571,10 @@ static void rtl8156_down(struct r8152 *tp) ocp_data |= NOW_IS_OOB; ocp_write_byte(tp, MCU_TYPE_PLA, PLA_OOB_CTRL, ocp_data); + ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_SFF_STS_7); + ocp_data |= MCU_BORW_EN; + ocp_write_word(tp, MCU_TYPE_PLA, PLA_SFF_STS_7, ocp_data); + rtl_rx_vlan_en(tp, true); rxdy_gated_en(tp, false); From 1d3eeb199970297305af56686eebf75da5ade7f5 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 13 Jul 2022 13:51:51 -0700 Subject: [PATCH 0780/1056] ip: Fix data-races around sysctl_ip_default_ttl. commit 8281b7ec5c56b71cb2cc5a1728b41607be66959c upstream. While reading sysctl_ip_default_ttl, it can be changed concurrently. Thus, we need to add READ_ONCE() to its readers. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/netronome/nfp/flower/action.c | 2 +- include/net/route.h | 2 +- net/ipv4/ip_sockglue.c | 2 +- net/ipv4/netfilter/nf_reject_ipv4.c | 4 ++-- net/ipv4/proc.c | 2 +- net/netfilter/nf_synproxy_core.c | 2 +- 6 files changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/flower/action.c b/drivers/net/ethernet/netronome/nfp/flower/action.c index 2a432de11858da..df5a6a0bf1d5dc 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/action.c +++ b/drivers/net/ethernet/netronome/nfp/flower/action.c @@ -472,7 +472,7 @@ nfp_fl_set_tun(struct nfp_app *app, struct nfp_fl_set_tun *set_tun, set_tun->ttl = ip4_dst_hoplimit(&rt->dst); ip_rt_put(rt); } else { - set_tun->ttl = net->ipv4.sysctl_ip_default_ttl; + set_tun->ttl = READ_ONCE(net->ipv4.sysctl_ip_default_ttl); } } diff --git a/include/net/route.h b/include/net/route.h index 2551f3f03b37ee..30610101ea14ff 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -360,7 +360,7 @@ static inline int ip4_dst_hoplimit(const struct dst_entry *dst) struct net *net = dev_net(dst->dev); if (hoplimit == 0) - hoplimit = net->ipv4.sysctl_ip_default_ttl; + hoplimit = READ_ONCE(net->ipv4.sysctl_ip_default_ttl); return hoplimit; } diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index b297bb28556ec5..8268e427f8893d 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -1597,7 +1597,7 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname, { struct net *net = sock_net(sk); val = (inet->uc_ttl == -1 ? - net->ipv4.sysctl_ip_default_ttl : + READ_ONCE(net->ipv4.sysctl_ip_default_ttl) : inet->uc_ttl); break; } diff --git a/net/ipv4/netfilter/nf_reject_ipv4.c b/net/ipv4/netfilter/nf_reject_ipv4.c index 4eed5afca392e1..f2edb40c0db00b 100644 --- a/net/ipv4/netfilter/nf_reject_ipv4.c +++ b/net/ipv4/netfilter/nf_reject_ipv4.c @@ -62,7 +62,7 @@ struct sk_buff *nf_reject_skb_v4_tcp_reset(struct net *net, skb_reserve(nskb, LL_MAX_HEADER); niph = nf_reject_iphdr_put(nskb, oldskb, IPPROTO_TCP, - net->ipv4.sysctl_ip_default_ttl); + READ_ONCE(net->ipv4.sysctl_ip_default_ttl)); nf_reject_ip_tcphdr_put(nskb, oldskb, oth); niph->tot_len = htons(nskb->len); ip_send_check(niph); @@ -115,7 +115,7 @@ struct sk_buff *nf_reject_skb_v4_unreach(struct net *net, skb_reserve(nskb, LL_MAX_HEADER); niph = nf_reject_iphdr_put(nskb, oldskb, IPPROTO_ICMP, - net->ipv4.sysctl_ip_default_ttl); + READ_ONCE(net->ipv4.sysctl_ip_default_ttl)); skb_reset_transport_header(nskb); icmph = skb_put_zero(nskb, sizeof(struct icmphdr)); diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index f30273afb5399d..4b9280a3b67325 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -387,7 +387,7 @@ static int snmp_seq_show_ipstats(struct seq_file *seq, void *v) seq_printf(seq, "\nIp: %d %d", IPV4_DEVCONF_ALL(net, FORWARDING) ? 1 : 2, - net->ipv4.sysctl_ip_default_ttl); + READ_ONCE(net->ipv4.sysctl_ip_default_ttl)); BUILD_BUG_ON(offsetof(struct ipstats_mib, mibs) != 0); snmp_get_cpu_field64_batch(buff64, snmp4_ipstats_list, diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c index 2dfc5dae065638..049a88f0380117 100644 --- a/net/netfilter/nf_synproxy_core.c +++ b/net/netfilter/nf_synproxy_core.c @@ -427,7 +427,7 @@ synproxy_build_ip(struct net *net, struct sk_buff *skb, __be32 saddr, iph->tos = 0; iph->id = 0; iph->frag_off = htons(IP_DF); - iph->ttl = net->ipv4.sysctl_ip_default_ttl; + iph->ttl = READ_ONCE(net->ipv4.sysctl_ip_default_ttl); iph->protocol = IPPROTO_TCP; iph->check = 0; iph->saddr = saddr; From c8e32bca0676ac663266a3b16562cb017300adcd Mon Sep 17 00:00:00 2001 From: Hangyu Hua Date: Wed, 1 Jun 2022 14:46:25 +0800 Subject: [PATCH 0781/1056] xfrm: xfrm_policy: fix a possible double xfrm_pols_put() in xfrm_bundle_lookup() [ Upstream commit f85daf0e725358be78dfd208dea5fd665d8cb901 ] xfrm_policy_lookup() will call xfrm_pol_hold_rcu() to get a refcount of pols[0]. This refcount can be dropped in xfrm_expand_policies() when xfrm_expand_policies() return error. pols[0]'s refcount is balanced in here. But xfrm_bundle_lookup() will also call xfrm_pols_put() with num_pols == 1 to drop this refcount when xfrm_expand_policies() return error. This patch also fix an illegal address access. pols[0] will save a error point when xfrm_policy_lookup fails. This lead to xfrm_pols_put to resolve an illegal address in xfrm_bundle_lookup's error path. Fix these by setting num_pols = 0 in xfrm_expand_policies()'s error path. Fixes: 80c802f3073e ("xfrm: cache bundles instead of policies for outgoing flows") Signed-off-by: Hangyu Hua Signed-off-by: Steffen Klassert Signed-off-by: Sasha Levin --- net/xfrm/xfrm_policy.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index a6271b955e11d7..fb198f9490a0fe 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -2678,8 +2678,10 @@ static int xfrm_expand_policies(const struct flowi *fl, u16 family, *num_xfrms = 0; return 0; } - if (IS_ERR(pols[0])) + if (IS_ERR(pols[0])) { + *num_pols = 0; return PTR_ERR(pols[0]); + } *num_xfrms = pols[0]->xfrm_nr; @@ -2694,6 +2696,7 @@ static int xfrm_expand_policies(const struct flowi *fl, u16 family, if (pols[1]) { if (IS_ERR(pols[1])) { xfrm_pols_put(pols, *num_pols); + *num_pols = 0; return PTR_ERR(pols[1]); } (*num_pols)++; From 71ab83ac65e2d671552374123bf920c1d698335a Mon Sep 17 00:00:00 2001 From: Miaoqian Lin Date: Mon, 23 May 2022 18:10:09 +0400 Subject: [PATCH 0782/1056] power/reset: arm-versatile: Fix refcount leak in versatile_reboot_probe [ Upstream commit 80192eff64eee9b3bc0594a47381937b94b9d65a ] of_find_matching_node_and_match() returns a node pointer with refcount incremented, we should use of_node_put() on it when not need anymore. Add missing of_node_put() to avoid refcount leak. Fixes: 0e545f57b708 ("power: reset: driver for the Versatile syscon reboot") Signed-off-by: Miaoqian Lin Reviewed-by: Linus Walleij Signed-off-by: Sebastian Reichel Signed-off-by: Sasha Levin --- drivers/power/reset/arm-versatile-reboot.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/power/reset/arm-versatile-reboot.c b/drivers/power/reset/arm-versatile-reboot.c index 08d0a07b58ef28..c7624d7611a7ed 100644 --- a/drivers/power/reset/arm-versatile-reboot.c +++ b/drivers/power/reset/arm-versatile-reboot.c @@ -146,6 +146,7 @@ static int __init versatile_reboot_probe(void) versatile_reboot_type = (enum versatile_reboot)reboot_id->data; syscon_regmap = syscon_node_to_regmap(np); + of_node_put(np); if (IS_ERR(syscon_regmap)) return PTR_ERR(syscon_regmap); From 03fd151bdff0d27dccaf1586a6232c5d4bc82520 Mon Sep 17 00:00:00 2001 From: Mustafa Ismail Date: Tue, 5 Jul 2022 18:08:36 -0500 Subject: [PATCH 0783/1056] RDMA/irdma: Do not advertise 1GB page size for x722 [ Upstream commit 5e8afb8792f3b6ae7ccf700f8c19225382636401 ] x722 does not support 1GB page size but the irdma driver incorrectly advertises 1GB page size support for x722 device to ib_core to compute the best page size to use on this MR. This could lead to incorrect start offsets computed by hardware on the MR. Fixes: b48c24c2d710 ("RDMA/irdma: Implement device supported verb APIs") Signed-off-by: Mustafa Ismail Signed-off-by: Shiraz Saleem Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/hw/irdma/i40iw_hw.c | 1 + drivers/infiniband/hw/irdma/icrdma_hw.c | 1 + drivers/infiniband/hw/irdma/irdma.h | 1 + drivers/infiniband/hw/irdma/verbs.c | 4 ++-- 4 files changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/irdma/i40iw_hw.c b/drivers/infiniband/hw/irdma/i40iw_hw.c index 64148ad8a604ec..040d4e2b97676d 100644 --- a/drivers/infiniband/hw/irdma/i40iw_hw.c +++ b/drivers/infiniband/hw/irdma/i40iw_hw.c @@ -202,6 +202,7 @@ void i40iw_init_hw(struct irdma_sc_dev *dev) dev->hw_attrs.uk_attrs.max_hw_read_sges = I40IW_MAX_SGE_RD; dev->hw_attrs.max_hw_device_pages = I40IW_MAX_PUSH_PAGE_COUNT; dev->hw_attrs.uk_attrs.max_hw_inline = I40IW_MAX_INLINE_DATA_SIZE; + dev->hw_attrs.page_size_cap = SZ_4K | SZ_2M; dev->hw_attrs.max_hw_ird = I40IW_MAX_IRD_SIZE; dev->hw_attrs.max_hw_ord = I40IW_MAX_ORD_SIZE; dev->hw_attrs.max_hw_wqes = I40IW_MAX_WQ_ENTRIES; diff --git a/drivers/infiniband/hw/irdma/icrdma_hw.c b/drivers/infiniband/hw/irdma/icrdma_hw.c index cf53b17510cdb2..5986fd906308cc 100644 --- a/drivers/infiniband/hw/irdma/icrdma_hw.c +++ b/drivers/infiniband/hw/irdma/icrdma_hw.c @@ -139,6 +139,7 @@ void icrdma_init_hw(struct irdma_sc_dev *dev) dev->cqp_db = dev->hw_regs[IRDMA_CQPDB]; dev->cq_ack_db = dev->hw_regs[IRDMA_CQACK]; dev->irq_ops = &icrdma_irq_ops; + dev->hw_attrs.page_size_cap = SZ_4K | SZ_2M | SZ_1G; dev->hw_attrs.max_hw_ird = ICRDMA_MAX_IRD_SIZE; dev->hw_attrs.max_hw_ord = ICRDMA_MAX_ORD_SIZE; dev->hw_attrs.max_stat_inst = ICRDMA_MAX_STATS_COUNT; diff --git a/drivers/infiniband/hw/irdma/irdma.h b/drivers/infiniband/hw/irdma/irdma.h index 46c12334c73546..4789e85d717b3e 100644 --- a/drivers/infiniband/hw/irdma/irdma.h +++ b/drivers/infiniband/hw/irdma/irdma.h @@ -127,6 +127,7 @@ struct irdma_hw_attrs { u64 max_hw_outbound_msg_size; u64 max_hw_inbound_msg_size; u64 max_mr_size; + u64 page_size_cap; u32 min_hw_qp_id; u32 min_hw_aeq_size; u32 max_hw_aeq_size; diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c index 8a3ac4257e8679..0eef4642869136 100644 --- a/drivers/infiniband/hw/irdma/verbs.c +++ b/drivers/infiniband/hw/irdma/verbs.c @@ -29,7 +29,7 @@ static int irdma_query_device(struct ib_device *ibdev, props->vendor_part_id = pcidev->device; props->hw_ver = rf->pcidev->revision; - props->page_size_cap = SZ_4K | SZ_2M | SZ_1G; + props->page_size_cap = hw_attrs->page_size_cap; props->max_mr_size = hw_attrs->max_mr_size; props->max_qp = rf->max_qp - rf->used_qps; props->max_qp_wr = hw_attrs->max_qp_wr; @@ -2776,7 +2776,7 @@ static struct ib_mr *irdma_reg_user_mr(struct ib_pd *pd, u64 start, u64 len, if (req.reg_type == IRDMA_MEMREG_TYPE_MEM) { iwmr->page_size = ib_umem_find_best_pgsz(region, - SZ_4K | SZ_2M | SZ_1G, + iwdev->rf->sc_dev.hw_attrs.page_size_cap, virt); if (unlikely(!iwmr->page_size)) { kfree(iwmr); From a4c5115140ed1833197bad9a6b80265840ff427f Mon Sep 17 00:00:00 2001 From: Mustafa Ismail Date: Tue, 5 Jul 2022 18:08:37 -0500 Subject: [PATCH 0784/1056] RDMA/irdma: Fix sleep from invalid context BUG [ Upstream commit cc0315564d6eec91c716d314b743321be24c70b3 ] Taking the qos_mutex to process RoCEv2 QP's on netdev events causes a kernel splat. Fix this by removing the handling for RoCEv2 in irdma_cm_teardown_connections that uses the mutex. This handling is only needed for iWARP to avoid having connections established while the link is down or having connections remain functional after the IP address is removed. BUG: sleeping function called from invalid context at kernel/locking/mutex. Call Trace: kernel: dump_stack+0x66/0x90 kernel: ___might_sleep.cold.92+0x8d/0x9a kernel: mutex_lock+0x1c/0x40 kernel: irdma_cm_teardown_connections+0x28e/0x4d0 [irdma] kernel: ? check_preempt_curr+0x7a/0x90 kernel: ? select_idle_sibling+0x22/0x3c0 kernel: ? select_task_rq_fair+0x94c/0xc90 kernel: ? irdma_exec_cqp_cmd+0xc27/0x17c0 [irdma] kernel: ? __wake_up_common+0x7a/0x190 kernel: irdma_if_notify+0x3cc/0x450 [irdma] kernel: ? sched_clock_cpu+0xc/0xb0 kernel: irdma_inet6addr_event+0xc6/0x150 [irdma] Fixes: 146b9756f14c ("RDMA/irdma: Add connection manager") Signed-off-by: Mustafa Ismail Signed-off-by: Shiraz Saleem Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/hw/irdma/cm.c | 50 -------------------------------- 1 file changed, 50 deletions(-) diff --git a/drivers/infiniband/hw/irdma/cm.c b/drivers/infiniband/hw/irdma/cm.c index 632f65e53b63f1..60d4e9c151ff75 100644 --- a/drivers/infiniband/hw/irdma/cm.c +++ b/drivers/infiniband/hw/irdma/cm.c @@ -4221,10 +4221,6 @@ void irdma_cm_teardown_connections(struct irdma_device *iwdev, u32 *ipaddr, struct irdma_cm_node *cm_node; struct list_head teardown_list; struct ib_qp_attr attr; - struct irdma_sc_vsi *vsi = &iwdev->vsi; - struct irdma_sc_qp *sc_qp; - struct irdma_qp *qp; - int i; INIT_LIST_HEAD(&teardown_list); @@ -4241,52 +4237,6 @@ void irdma_cm_teardown_connections(struct irdma_device *iwdev, u32 *ipaddr, irdma_cm_disconn(cm_node->iwqp); irdma_rem_ref_cm_node(cm_node); } - if (!iwdev->roce_mode) - return; - - INIT_LIST_HEAD(&teardown_list); - for (i = 0; i < IRDMA_MAX_USER_PRIORITY; i++) { - mutex_lock(&vsi->qos[i].qos_mutex); - list_for_each_safe (list_node, list_core_temp, - &vsi->qos[i].qplist) { - u32 qp_ip[4]; - - sc_qp = container_of(list_node, struct irdma_sc_qp, - list); - if (sc_qp->qp_uk.qp_type != IRDMA_QP_TYPE_ROCE_RC) - continue; - - qp = sc_qp->qp_uk.back_qp; - if (!disconnect_all) { - if (nfo->ipv4) - qp_ip[0] = qp->udp_info.local_ipaddr[3]; - else - memcpy(qp_ip, - &qp->udp_info.local_ipaddr[0], - sizeof(qp_ip)); - } - - if (disconnect_all || - (nfo->vlan_id == (qp->udp_info.vlan_tag & VLAN_VID_MASK) && - !memcmp(qp_ip, ipaddr, nfo->ipv4 ? 4 : 16))) { - spin_lock(&iwdev->rf->qptable_lock); - if (iwdev->rf->qp_table[sc_qp->qp_uk.qp_id]) { - irdma_qp_add_ref(&qp->ibqp); - list_add(&qp->teardown_entry, - &teardown_list); - } - spin_unlock(&iwdev->rf->qptable_lock); - } - } - mutex_unlock(&vsi->qos[i].qos_mutex); - } - - list_for_each_safe (list_node, list_core_temp, &teardown_list) { - qp = container_of(list_node, struct irdma_qp, teardown_entry); - attr.qp_state = IB_QPS_ERR; - irdma_modify_qp_roce(&qp->ibqp, &attr, IB_QP_STATE, NULL); - irdma_qp_rem_ref(&qp->ibqp); - } } /** From d40def7cd05c7c08943b01e677650aaaf98dd43c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ar=C4=B1n=C3=A7=20=C3=9CNAL?= Date: Thu, 14 Apr 2022 20:39:03 +0300 Subject: [PATCH 0785/1056] pinctrl: ralink: rename MT7628(an) functions to MT76X8 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 150438c86f55989632005b92c94f4aa2ec562ed6 ] The functions that include "MT7628(an)" are for MT7628 and MT7688 SoCs. Rename them to MT76X8 to refer to both of the SoCs. Signed-off-by: Arınç ÜNAL Reviewed-by: Sergio Paracuellos Acked-by: Sergio Paracuellos Link: https://lore.kernel.org/r/20220414173916.5552-2-arinc.unal@arinc9.com Signed-off-by: Linus Walleij Signed-off-by: Sasha Levin --- drivers/pinctrl/ralink/pinctrl-mt7620.c | 218 ++++++++++++------------ 1 file changed, 109 insertions(+), 109 deletions(-) diff --git a/drivers/pinctrl/ralink/pinctrl-mt7620.c b/drivers/pinctrl/ralink/pinctrl-mt7620.c index 6853b5b8b0fe7f..d3f9feec1f7460 100644 --- a/drivers/pinctrl/ralink/pinctrl-mt7620.c +++ b/drivers/pinctrl/ralink/pinctrl-mt7620.c @@ -112,260 +112,260 @@ static struct rt2880_pmx_group mt7620a_pinmux_data[] = { { 0 } }; -static struct rt2880_pmx_func pwm1_grp_mt7628[] = { +static struct rt2880_pmx_func pwm1_grp_mt76x8[] = { FUNC("sdxc d6", 3, 19, 1), FUNC("utif", 2, 19, 1), FUNC("gpio", 1, 19, 1), FUNC("pwm1", 0, 19, 1), }; -static struct rt2880_pmx_func pwm0_grp_mt7628[] = { +static struct rt2880_pmx_func pwm0_grp_mt76x8[] = { FUNC("sdxc d7", 3, 18, 1), FUNC("utif", 2, 18, 1), FUNC("gpio", 1, 18, 1), FUNC("pwm0", 0, 18, 1), }; -static struct rt2880_pmx_func uart2_grp_mt7628[] = { +static struct rt2880_pmx_func uart2_grp_mt76x8[] = { FUNC("sdxc d5 d4", 3, 20, 2), FUNC("pwm", 2, 20, 2), FUNC("gpio", 1, 20, 2), FUNC("uart2", 0, 20, 2), }; -static struct rt2880_pmx_func uart1_grp_mt7628[] = { +static struct rt2880_pmx_func uart1_grp_mt76x8[] = { FUNC("sw_r", 3, 45, 2), FUNC("pwm", 2, 45, 2), FUNC("gpio", 1, 45, 2), FUNC("uart1", 0, 45, 2), }; -static struct rt2880_pmx_func i2c_grp_mt7628[] = { +static struct rt2880_pmx_func i2c_grp_mt76x8[] = { FUNC("-", 3, 4, 2), FUNC("debug", 2, 4, 2), FUNC("gpio", 1, 4, 2), FUNC("i2c", 0, 4, 2), }; -static struct rt2880_pmx_func refclk_grp_mt7628[] = { FUNC("refclk", 0, 37, 1) }; -static struct rt2880_pmx_func perst_grp_mt7628[] = { FUNC("perst", 0, 36, 1) }; -static struct rt2880_pmx_func wdt_grp_mt7628[] = { FUNC("wdt", 0, 38, 1) }; -static struct rt2880_pmx_func spi_grp_mt7628[] = { FUNC("spi", 0, 7, 4) }; +static struct rt2880_pmx_func refclk_grp_mt76x8[] = { FUNC("refclk", 0, 37, 1) }; +static struct rt2880_pmx_func perst_grp_mt76x8[] = { FUNC("perst", 0, 36, 1) }; +static struct rt2880_pmx_func wdt_grp_mt76x8[] = { FUNC("wdt", 0, 38, 1) }; +static struct rt2880_pmx_func spi_grp_mt76x8[] = { FUNC("spi", 0, 7, 4) }; -static struct rt2880_pmx_func sd_mode_grp_mt7628[] = { +static struct rt2880_pmx_func sd_mode_grp_mt76x8[] = { FUNC("jtag", 3, 22, 8), FUNC("utif", 2, 22, 8), FUNC("gpio", 1, 22, 8), FUNC("sdxc", 0, 22, 8), }; -static struct rt2880_pmx_func uart0_grp_mt7628[] = { +static struct rt2880_pmx_func uart0_grp_mt76x8[] = { FUNC("-", 3, 12, 2), FUNC("-", 2, 12, 2), FUNC("gpio", 1, 12, 2), FUNC("uart0", 0, 12, 2), }; -static struct rt2880_pmx_func i2s_grp_mt7628[] = { +static struct rt2880_pmx_func i2s_grp_mt76x8[] = { FUNC("antenna", 3, 0, 4), FUNC("pcm", 2, 0, 4), FUNC("gpio", 1, 0, 4), FUNC("i2s", 0, 0, 4), }; -static struct rt2880_pmx_func spi_cs1_grp_mt7628[] = { +static struct rt2880_pmx_func spi_cs1_grp_mt76x8[] = { FUNC("-", 3, 6, 1), FUNC("refclk", 2, 6, 1), FUNC("gpio", 1, 6, 1), FUNC("spi cs1", 0, 6, 1), }; -static struct rt2880_pmx_func spis_grp_mt7628[] = { +static struct rt2880_pmx_func spis_grp_mt76x8[] = { FUNC("pwm_uart2", 3, 14, 4), FUNC("utif", 2, 14, 4), FUNC("gpio", 1, 14, 4), FUNC("spis", 0, 14, 4), }; -static struct rt2880_pmx_func gpio_grp_mt7628[] = { +static struct rt2880_pmx_func gpio_grp_mt76x8[] = { FUNC("pcie", 3, 11, 1), FUNC("refclk", 2, 11, 1), FUNC("gpio", 1, 11, 1), FUNC("gpio", 0, 11, 1), }; -static struct rt2880_pmx_func p4led_kn_grp_mt7628[] = { +static struct rt2880_pmx_func p4led_kn_grp_mt76x8[] = { FUNC("jtag", 3, 30, 1), FUNC("utif", 2, 30, 1), FUNC("gpio", 1, 30, 1), FUNC("p4led_kn", 0, 30, 1), }; -static struct rt2880_pmx_func p3led_kn_grp_mt7628[] = { +static struct rt2880_pmx_func p3led_kn_grp_mt76x8[] = { FUNC("jtag", 3, 31, 1), FUNC("utif", 2, 31, 1), FUNC("gpio", 1, 31, 1), FUNC("p3led_kn", 0, 31, 1), }; -static struct rt2880_pmx_func p2led_kn_grp_mt7628[] = { +static struct rt2880_pmx_func p2led_kn_grp_mt76x8[] = { FUNC("jtag", 3, 32, 1), FUNC("utif", 2, 32, 1), FUNC("gpio", 1, 32, 1), FUNC("p2led_kn", 0, 32, 1), }; -static struct rt2880_pmx_func p1led_kn_grp_mt7628[] = { +static struct rt2880_pmx_func p1led_kn_grp_mt76x8[] = { FUNC("jtag", 3, 33, 1), FUNC("utif", 2, 33, 1), FUNC("gpio", 1, 33, 1), FUNC("p1led_kn", 0, 33, 1), }; -static struct rt2880_pmx_func p0led_kn_grp_mt7628[] = { +static struct rt2880_pmx_func p0led_kn_grp_mt76x8[] = { FUNC("jtag", 3, 34, 1), FUNC("rsvd", 2, 34, 1), FUNC("gpio", 1, 34, 1), FUNC("p0led_kn", 0, 34, 1), }; -static struct rt2880_pmx_func wled_kn_grp_mt7628[] = { +static struct rt2880_pmx_func wled_kn_grp_mt76x8[] = { FUNC("rsvd", 3, 35, 1), FUNC("rsvd", 2, 35, 1), FUNC("gpio", 1, 35, 1), FUNC("wled_kn", 0, 35, 1), }; -static struct rt2880_pmx_func p4led_an_grp_mt7628[] = { +static struct rt2880_pmx_func p4led_an_grp_mt76x8[] = { FUNC("jtag", 3, 39, 1), FUNC("utif", 2, 39, 1), FUNC("gpio", 1, 39, 1), FUNC("p4led_an", 0, 39, 1), }; -static struct rt2880_pmx_func p3led_an_grp_mt7628[] = { +static struct rt2880_pmx_func p3led_an_grp_mt76x8[] = { FUNC("jtag", 3, 40, 1), FUNC("utif", 2, 40, 1), FUNC("gpio", 1, 40, 1), FUNC("p3led_an", 0, 40, 1), }; -static struct rt2880_pmx_func p2led_an_grp_mt7628[] = { +static struct rt2880_pmx_func p2led_an_grp_mt76x8[] = { FUNC("jtag", 3, 41, 1), FUNC("utif", 2, 41, 1), FUNC("gpio", 1, 41, 1), FUNC("p2led_an", 0, 41, 1), }; -static struct rt2880_pmx_func p1led_an_grp_mt7628[] = { +static struct rt2880_pmx_func p1led_an_grp_mt76x8[] = { FUNC("jtag", 3, 42, 1), FUNC("utif", 2, 42, 1), FUNC("gpio", 1, 42, 1), FUNC("p1led_an", 0, 42, 1), }; -static struct rt2880_pmx_func p0led_an_grp_mt7628[] = { +static struct rt2880_pmx_func p0led_an_grp_mt76x8[] = { FUNC("jtag", 3, 43, 1), FUNC("rsvd", 2, 43, 1), FUNC("gpio", 1, 43, 1), FUNC("p0led_an", 0, 43, 1), }; -static struct rt2880_pmx_func wled_an_grp_mt7628[] = { +static struct rt2880_pmx_func wled_an_grp_mt76x8[] = { FUNC("rsvd", 3, 44, 1), FUNC("rsvd", 2, 44, 1), FUNC("gpio", 1, 44, 1), FUNC("wled_an", 0, 44, 1), }; -#define MT7628_GPIO_MODE_MASK 0x3 - -#define MT7628_GPIO_MODE_P4LED_KN 58 -#define MT7628_GPIO_MODE_P3LED_KN 56 -#define MT7628_GPIO_MODE_P2LED_KN 54 -#define MT7628_GPIO_MODE_P1LED_KN 52 -#define MT7628_GPIO_MODE_P0LED_KN 50 -#define MT7628_GPIO_MODE_WLED_KN 48 -#define MT7628_GPIO_MODE_P4LED_AN 42 -#define MT7628_GPIO_MODE_P3LED_AN 40 -#define MT7628_GPIO_MODE_P2LED_AN 38 -#define MT7628_GPIO_MODE_P1LED_AN 36 -#define MT7628_GPIO_MODE_P0LED_AN 34 -#define MT7628_GPIO_MODE_WLED_AN 32 -#define MT7628_GPIO_MODE_PWM1 30 -#define MT7628_GPIO_MODE_PWM0 28 -#define MT7628_GPIO_MODE_UART2 26 -#define MT7628_GPIO_MODE_UART1 24 -#define MT7628_GPIO_MODE_I2C 20 -#define MT7628_GPIO_MODE_REFCLK 18 -#define MT7628_GPIO_MODE_PERST 16 -#define MT7628_GPIO_MODE_WDT 14 -#define MT7628_GPIO_MODE_SPI 12 -#define MT7628_GPIO_MODE_SDMODE 10 -#define MT7628_GPIO_MODE_UART0 8 -#define MT7628_GPIO_MODE_I2S 6 -#define MT7628_GPIO_MODE_CS1 4 -#define MT7628_GPIO_MODE_SPIS 2 -#define MT7628_GPIO_MODE_GPIO 0 - -static struct rt2880_pmx_group mt7628an_pinmux_data[] = { - GRP_G("pwm1", pwm1_grp_mt7628, MT7628_GPIO_MODE_MASK, - 1, MT7628_GPIO_MODE_PWM1), - GRP_G("pwm0", pwm0_grp_mt7628, MT7628_GPIO_MODE_MASK, - 1, MT7628_GPIO_MODE_PWM0), - GRP_G("uart2", uart2_grp_mt7628, MT7628_GPIO_MODE_MASK, - 1, MT7628_GPIO_MODE_UART2), - GRP_G("uart1", uart1_grp_mt7628, MT7628_GPIO_MODE_MASK, - 1, MT7628_GPIO_MODE_UART1), - GRP_G("i2c", i2c_grp_mt7628, MT7628_GPIO_MODE_MASK, - 1, MT7628_GPIO_MODE_I2C), - GRP("refclk", refclk_grp_mt7628, 1, MT7628_GPIO_MODE_REFCLK), - GRP("perst", perst_grp_mt7628, 1, MT7628_GPIO_MODE_PERST), - GRP("wdt", wdt_grp_mt7628, 1, MT7628_GPIO_MODE_WDT), - GRP("spi", spi_grp_mt7628, 1, MT7628_GPIO_MODE_SPI), - GRP_G("sdmode", sd_mode_grp_mt7628, MT7628_GPIO_MODE_MASK, - 1, MT7628_GPIO_MODE_SDMODE), - GRP_G("uart0", uart0_grp_mt7628, MT7628_GPIO_MODE_MASK, - 1, MT7628_GPIO_MODE_UART0), - GRP_G("i2s", i2s_grp_mt7628, MT7628_GPIO_MODE_MASK, - 1, MT7628_GPIO_MODE_I2S), - GRP_G("spi cs1", spi_cs1_grp_mt7628, MT7628_GPIO_MODE_MASK, - 1, MT7628_GPIO_MODE_CS1), - GRP_G("spis", spis_grp_mt7628, MT7628_GPIO_MODE_MASK, - 1, MT7628_GPIO_MODE_SPIS), - GRP_G("gpio", gpio_grp_mt7628, MT7628_GPIO_MODE_MASK, - 1, MT7628_GPIO_MODE_GPIO), - GRP_G("wled_an", wled_an_grp_mt7628, MT7628_GPIO_MODE_MASK, - 1, MT7628_GPIO_MODE_WLED_AN), - GRP_G("p0led_an", p0led_an_grp_mt7628, MT7628_GPIO_MODE_MASK, - 1, MT7628_GPIO_MODE_P0LED_AN), - GRP_G("p1led_an", p1led_an_grp_mt7628, MT7628_GPIO_MODE_MASK, - 1, MT7628_GPIO_MODE_P1LED_AN), - GRP_G("p2led_an", p2led_an_grp_mt7628, MT7628_GPIO_MODE_MASK, - 1, MT7628_GPIO_MODE_P2LED_AN), - GRP_G("p3led_an", p3led_an_grp_mt7628, MT7628_GPIO_MODE_MASK, - 1, MT7628_GPIO_MODE_P3LED_AN), - GRP_G("p4led_an", p4led_an_grp_mt7628, MT7628_GPIO_MODE_MASK, - 1, MT7628_GPIO_MODE_P4LED_AN), - GRP_G("wled_kn", wled_kn_grp_mt7628, MT7628_GPIO_MODE_MASK, - 1, MT7628_GPIO_MODE_WLED_KN), - GRP_G("p0led_kn", p0led_kn_grp_mt7628, MT7628_GPIO_MODE_MASK, - 1, MT7628_GPIO_MODE_P0LED_KN), - GRP_G("p1led_kn", p1led_kn_grp_mt7628, MT7628_GPIO_MODE_MASK, - 1, MT7628_GPIO_MODE_P1LED_KN), - GRP_G("p2led_kn", p2led_kn_grp_mt7628, MT7628_GPIO_MODE_MASK, - 1, MT7628_GPIO_MODE_P2LED_KN), - GRP_G("p3led_kn", p3led_kn_grp_mt7628, MT7628_GPIO_MODE_MASK, - 1, MT7628_GPIO_MODE_P3LED_KN), - GRP_G("p4led_kn", p4led_kn_grp_mt7628, MT7628_GPIO_MODE_MASK, - 1, MT7628_GPIO_MODE_P4LED_KN), +#define MT76X8_GPIO_MODE_MASK 0x3 + +#define MT76X8_GPIO_MODE_P4LED_KN 58 +#define MT76X8_GPIO_MODE_P3LED_KN 56 +#define MT76X8_GPIO_MODE_P2LED_KN 54 +#define MT76X8_GPIO_MODE_P1LED_KN 52 +#define MT76X8_GPIO_MODE_P0LED_KN 50 +#define MT76X8_GPIO_MODE_WLED_KN 48 +#define MT76X8_GPIO_MODE_P4LED_AN 42 +#define MT76X8_GPIO_MODE_P3LED_AN 40 +#define MT76X8_GPIO_MODE_P2LED_AN 38 +#define MT76X8_GPIO_MODE_P1LED_AN 36 +#define MT76X8_GPIO_MODE_P0LED_AN 34 +#define MT76X8_GPIO_MODE_WLED_AN 32 +#define MT76X8_GPIO_MODE_PWM1 30 +#define MT76X8_GPIO_MODE_PWM0 28 +#define MT76X8_GPIO_MODE_UART2 26 +#define MT76X8_GPIO_MODE_UART1 24 +#define MT76X8_GPIO_MODE_I2C 20 +#define MT76X8_GPIO_MODE_REFCLK 18 +#define MT76X8_GPIO_MODE_PERST 16 +#define MT76X8_GPIO_MODE_WDT 14 +#define MT76X8_GPIO_MODE_SPI 12 +#define MT76X8_GPIO_MODE_SDMODE 10 +#define MT76X8_GPIO_MODE_UART0 8 +#define MT76X8_GPIO_MODE_I2S 6 +#define MT76X8_GPIO_MODE_CS1 4 +#define MT76X8_GPIO_MODE_SPIS 2 +#define MT76X8_GPIO_MODE_GPIO 0 + +static struct rt2880_pmx_group mt76x8_pinmux_data[] = { + GRP_G("pwm1", pwm1_grp_mt76x8, MT76X8_GPIO_MODE_MASK, + 1, MT76X8_GPIO_MODE_PWM1), + GRP_G("pwm0", pwm0_grp_mt76x8, MT76X8_GPIO_MODE_MASK, + 1, MT76X8_GPIO_MODE_PWM0), + GRP_G("uart2", uart2_grp_mt76x8, MT76X8_GPIO_MODE_MASK, + 1, MT76X8_GPIO_MODE_UART2), + GRP_G("uart1", uart1_grp_mt76x8, MT76X8_GPIO_MODE_MASK, + 1, MT76X8_GPIO_MODE_UART1), + GRP_G("i2c", i2c_grp_mt76x8, MT76X8_GPIO_MODE_MASK, + 1, MT76X8_GPIO_MODE_I2C), + GRP("refclk", refclk_grp_mt76x8, 1, MT76X8_GPIO_MODE_REFCLK), + GRP("perst", perst_grp_mt76x8, 1, MT76X8_GPIO_MODE_PERST), + GRP("wdt", wdt_grp_mt76x8, 1, MT76X8_GPIO_MODE_WDT), + GRP("spi", spi_grp_mt76x8, 1, MT76X8_GPIO_MODE_SPI), + GRP_G("sdmode", sd_mode_grp_mt76x8, MT76X8_GPIO_MODE_MASK, + 1, MT76X8_GPIO_MODE_SDMODE), + GRP_G("uart0", uart0_grp_mt76x8, MT76X8_GPIO_MODE_MASK, + 1, MT76X8_GPIO_MODE_UART0), + GRP_G("i2s", i2s_grp_mt76x8, MT76X8_GPIO_MODE_MASK, + 1, MT76X8_GPIO_MODE_I2S), + GRP_G("spi cs1", spi_cs1_grp_mt76x8, MT76X8_GPIO_MODE_MASK, + 1, MT76X8_GPIO_MODE_CS1), + GRP_G("spis", spis_grp_mt76x8, MT76X8_GPIO_MODE_MASK, + 1, MT76X8_GPIO_MODE_SPIS), + GRP_G("gpio", gpio_grp_mt76x8, MT76X8_GPIO_MODE_MASK, + 1, MT76X8_GPIO_MODE_GPIO), + GRP_G("wled_an", wled_an_grp_mt76x8, MT76X8_GPIO_MODE_MASK, + 1, MT76X8_GPIO_MODE_WLED_AN), + GRP_G("p0led_an", p0led_an_grp_mt76x8, MT76X8_GPIO_MODE_MASK, + 1, MT76X8_GPIO_MODE_P0LED_AN), + GRP_G("p1led_an", p1led_an_grp_mt76x8, MT76X8_GPIO_MODE_MASK, + 1, MT76X8_GPIO_MODE_P1LED_AN), + GRP_G("p2led_an", p2led_an_grp_mt76x8, MT76X8_GPIO_MODE_MASK, + 1, MT76X8_GPIO_MODE_P2LED_AN), + GRP_G("p3led_an", p3led_an_grp_mt76x8, MT76X8_GPIO_MODE_MASK, + 1, MT76X8_GPIO_MODE_P3LED_AN), + GRP_G("p4led_an", p4led_an_grp_mt76x8, MT76X8_GPIO_MODE_MASK, + 1, MT76X8_GPIO_MODE_P4LED_AN), + GRP_G("wled_kn", wled_kn_grp_mt76x8, MT76X8_GPIO_MODE_MASK, + 1, MT76X8_GPIO_MODE_WLED_KN), + GRP_G("p0led_kn", p0led_kn_grp_mt76x8, MT76X8_GPIO_MODE_MASK, + 1, MT76X8_GPIO_MODE_P0LED_KN), + GRP_G("p1led_kn", p1led_kn_grp_mt76x8, MT76X8_GPIO_MODE_MASK, + 1, MT76X8_GPIO_MODE_P1LED_KN), + GRP_G("p2led_kn", p2led_kn_grp_mt76x8, MT76X8_GPIO_MODE_MASK, + 1, MT76X8_GPIO_MODE_P2LED_KN), + GRP_G("p3led_kn", p3led_kn_grp_mt76x8, MT76X8_GPIO_MODE_MASK, + 1, MT76X8_GPIO_MODE_P3LED_KN), + GRP_G("p4led_kn", p4led_kn_grp_mt76x8, MT76X8_GPIO_MODE_MASK, + 1, MT76X8_GPIO_MODE_P4LED_KN), { 0 } }; static int mt7620_pinmux_probe(struct platform_device *pdev) { if (is_mt76x8()) - return rt2880_pinmux_init(pdev, mt7628an_pinmux_data); + return rt2880_pinmux_init(pdev, mt76x8_pinmux_data); else return rt2880_pinmux_init(pdev, mt7620a_pinmux_data); } From e4e3187750f8da387774c2654a223f858391033b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ar=C4=B1n=C3=A7=20=C3=9CNAL?= Date: Thu, 14 Apr 2022 20:39:04 +0300 Subject: [PATCH 0786/1056] pinctrl: ralink: rename pinctrl-rt2880 to pinctrl-ralink MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 6b3dd85b0bdec1a8308fa5dcbafcd5d55b5f3608 ] pinctrl-rt2880.c and pinmux.h make up the Ralink pinctrl driver. Rename pinctrl-rt2880.c to pinctrl-ralink.c. Rename pinmux.h to pinctrl-ralink.h. Fix references to it. Rename functions that include "rt2880" to "ralink". Remove PINCTRL_RT2880 symbol and make the existing PINCTRL_RALINK symbol compile pinctrl-ralink.c. Change the bool to "Ralink pinctrl driver". Signed-off-by: Arınç ÜNAL Reviewed-by: Sergio Paracuellos Acked-by: Sergio Paracuellos Link: https://lore.kernel.org/r/20220414173916.5552-3-arinc.unal@arinc9.com Signed-off-by: Linus Walleij Signed-off-by: Sasha Levin --- drivers/pinctrl/ralink/Kconfig | 16 ++-- drivers/pinctrl/ralink/Makefile | 2 +- drivers/pinctrl/ralink/pinctrl-mt7620.c | 92 +++++++++---------- drivers/pinctrl/ralink/pinctrl-mt7621.c | 30 +++--- .../{pinctrl-rt2880.c => pinctrl-ralink.c} | 90 +++++++++--------- .../ralink/{pinmux.h => pinctrl-ralink.h} | 16 ++-- drivers/pinctrl/ralink/pinctrl-rt288x.c | 20 ++-- drivers/pinctrl/ralink/pinctrl-rt305x.c | 44 ++++----- drivers/pinctrl/ralink/pinctrl-rt3883.c | 28 +++--- 9 files changed, 167 insertions(+), 171 deletions(-) rename drivers/pinctrl/ralink/{pinctrl-rt2880.c => pinctrl-ralink.c} (73%) rename drivers/pinctrl/ralink/{pinmux.h => pinctrl-ralink.h} (75%) diff --git a/drivers/pinctrl/ralink/Kconfig b/drivers/pinctrl/ralink/Kconfig index a76ee3deb8c31d..d0f0a8f2b9b7de 100644 --- a/drivers/pinctrl/ralink/Kconfig +++ b/drivers/pinctrl/ralink/Kconfig @@ -3,37 +3,33 @@ menu "Ralink pinctrl drivers" depends on RALINK config PINCTRL_RALINK - bool "Ralink pin control support" - default y if RALINK - -config PINCTRL_RT2880 - bool "RT2880 pinctrl driver for RALINK/Mediatek SOCs" + bool "Ralink pinctrl driver" select PINMUX select GENERIC_PINCONF config PINCTRL_MT7620 bool "mt7620 pinctrl driver for RALINK/Mediatek SOCs" depends on RALINK && SOC_MT7620 - select PINCTRL_RT2880 + select PINCTRL_RALINK config PINCTRL_MT7621 bool "mt7621 pinctrl driver for RALINK/Mediatek SOCs" depends on RALINK && SOC_MT7621 - select PINCTRL_RT2880 + select PINCTRL_RALINK config PINCTRL_RT288X bool "RT288X pinctrl driver for RALINK/Mediatek SOCs" depends on RALINK && SOC_RT288X - select PINCTRL_RT2880 + select PINCTRL_RALINK config PINCTRL_RT305X bool "RT305X pinctrl driver for RALINK/Mediatek SOCs" depends on RALINK && SOC_RT305X - select PINCTRL_RT2880 + select PINCTRL_RALINK config PINCTRL_RT3883 bool "RT3883 pinctrl driver for RALINK/Mediatek SOCs" depends on RALINK && SOC_RT3883 - select PINCTRL_RT2880 + select PINCTRL_RALINK endmenu diff --git a/drivers/pinctrl/ralink/Makefile b/drivers/pinctrl/ralink/Makefile index a15610206ced45..2c1323b74e96f6 100644 --- a/drivers/pinctrl/ralink/Makefile +++ b/drivers/pinctrl/ralink/Makefile @@ -1,5 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 -obj-$(CONFIG_PINCTRL_RT2880) += pinctrl-rt2880.o +obj-$(CONFIG_PINCTRL_RALINK) += pinctrl-ralink.o obj-$(CONFIG_PINCTRL_MT7620) += pinctrl-mt7620.o obj-$(CONFIG_PINCTRL_MT7621) += pinctrl-mt7621.o diff --git a/drivers/pinctrl/ralink/pinctrl-mt7620.c b/drivers/pinctrl/ralink/pinctrl-mt7620.c index d3f9feec1f7460..51b863d85c51ed 100644 --- a/drivers/pinctrl/ralink/pinctrl-mt7620.c +++ b/drivers/pinctrl/ralink/pinctrl-mt7620.c @@ -5,7 +5,7 @@ #include #include #include -#include "pinmux.h" +#include "pinctrl-ralink.h" #define MT7620_GPIO_MODE_UART0_SHIFT 2 #define MT7620_GPIO_MODE_UART0_MASK 0x7 @@ -54,20 +54,20 @@ #define MT7620_GPIO_MODE_EPHY 15 #define MT7620_GPIO_MODE_PA 20 -static struct rt2880_pmx_func i2c_grp[] = { FUNC("i2c", 0, 1, 2) }; -static struct rt2880_pmx_func spi_grp[] = { FUNC("spi", 0, 3, 4) }; -static struct rt2880_pmx_func uartlite_grp[] = { FUNC("uartlite", 0, 15, 2) }; -static struct rt2880_pmx_func mdio_grp[] = { +static struct ralink_pmx_func i2c_grp[] = { FUNC("i2c", 0, 1, 2) }; +static struct ralink_pmx_func spi_grp[] = { FUNC("spi", 0, 3, 4) }; +static struct ralink_pmx_func uartlite_grp[] = { FUNC("uartlite", 0, 15, 2) }; +static struct ralink_pmx_func mdio_grp[] = { FUNC("mdio", MT7620_GPIO_MODE_MDIO, 22, 2), FUNC("refclk", MT7620_GPIO_MODE_MDIO_REFCLK, 22, 2), }; -static struct rt2880_pmx_func rgmii1_grp[] = { FUNC("rgmii1", 0, 24, 12) }; -static struct rt2880_pmx_func refclk_grp[] = { FUNC("spi refclk", 0, 37, 3) }; -static struct rt2880_pmx_func ephy_grp[] = { FUNC("ephy", 0, 40, 5) }; -static struct rt2880_pmx_func rgmii2_grp[] = { FUNC("rgmii2", 0, 60, 12) }; -static struct rt2880_pmx_func wled_grp[] = { FUNC("wled", 0, 72, 1) }; -static struct rt2880_pmx_func pa_grp[] = { FUNC("pa", 0, 18, 4) }; -static struct rt2880_pmx_func uartf_grp[] = { +static struct ralink_pmx_func rgmii1_grp[] = { FUNC("rgmii1", 0, 24, 12) }; +static struct ralink_pmx_func refclk_grp[] = { FUNC("spi refclk", 0, 37, 3) }; +static struct ralink_pmx_func ephy_grp[] = { FUNC("ephy", 0, 40, 5) }; +static struct ralink_pmx_func rgmii2_grp[] = { FUNC("rgmii2", 0, 60, 12) }; +static struct ralink_pmx_func wled_grp[] = { FUNC("wled", 0, 72, 1) }; +static struct ralink_pmx_func pa_grp[] = { FUNC("pa", 0, 18, 4) }; +static struct ralink_pmx_func uartf_grp[] = { FUNC("uartf", MT7620_GPIO_MODE_UARTF, 7, 8), FUNC("pcm uartf", MT7620_GPIO_MODE_PCM_UARTF, 7, 8), FUNC("pcm i2s", MT7620_GPIO_MODE_PCM_I2S, 7, 8), @@ -76,20 +76,20 @@ static struct rt2880_pmx_func uartf_grp[] = { FUNC("gpio uartf", MT7620_GPIO_MODE_GPIO_UARTF, 7, 4), FUNC("gpio i2s", MT7620_GPIO_MODE_GPIO_I2S, 7, 4), }; -static struct rt2880_pmx_func wdt_grp[] = { +static struct ralink_pmx_func wdt_grp[] = { FUNC("wdt rst", 0, 17, 1), FUNC("wdt refclk", 0, 17, 1), }; -static struct rt2880_pmx_func pcie_rst_grp[] = { +static struct ralink_pmx_func pcie_rst_grp[] = { FUNC("pcie rst", MT7620_GPIO_MODE_PCIE_RST, 36, 1), FUNC("pcie refclk", MT7620_GPIO_MODE_PCIE_REF, 36, 1) }; -static struct rt2880_pmx_func nd_sd_grp[] = { +static struct ralink_pmx_func nd_sd_grp[] = { FUNC("nand", MT7620_GPIO_MODE_NAND, 45, 15), FUNC("sd", MT7620_GPIO_MODE_SD, 47, 13) }; -static struct rt2880_pmx_group mt7620a_pinmux_data[] = { +static struct ralink_pmx_group mt7620a_pinmux_data[] = { GRP("i2c", i2c_grp, 1, MT7620_GPIO_MODE_I2C), GRP("uartf", uartf_grp, MT7620_GPIO_MODE_UART0_MASK, MT7620_GPIO_MODE_UART0_SHIFT), @@ -112,166 +112,166 @@ static struct rt2880_pmx_group mt7620a_pinmux_data[] = { { 0 } }; -static struct rt2880_pmx_func pwm1_grp_mt76x8[] = { +static struct ralink_pmx_func pwm1_grp_mt76x8[] = { FUNC("sdxc d6", 3, 19, 1), FUNC("utif", 2, 19, 1), FUNC("gpio", 1, 19, 1), FUNC("pwm1", 0, 19, 1), }; -static struct rt2880_pmx_func pwm0_grp_mt76x8[] = { +static struct ralink_pmx_func pwm0_grp_mt76x8[] = { FUNC("sdxc d7", 3, 18, 1), FUNC("utif", 2, 18, 1), FUNC("gpio", 1, 18, 1), FUNC("pwm0", 0, 18, 1), }; -static struct rt2880_pmx_func uart2_grp_mt76x8[] = { +static struct ralink_pmx_func uart2_grp_mt76x8[] = { FUNC("sdxc d5 d4", 3, 20, 2), FUNC("pwm", 2, 20, 2), FUNC("gpio", 1, 20, 2), FUNC("uart2", 0, 20, 2), }; -static struct rt2880_pmx_func uart1_grp_mt76x8[] = { +static struct ralink_pmx_func uart1_grp_mt76x8[] = { FUNC("sw_r", 3, 45, 2), FUNC("pwm", 2, 45, 2), FUNC("gpio", 1, 45, 2), FUNC("uart1", 0, 45, 2), }; -static struct rt2880_pmx_func i2c_grp_mt76x8[] = { +static struct ralink_pmx_func i2c_grp_mt76x8[] = { FUNC("-", 3, 4, 2), FUNC("debug", 2, 4, 2), FUNC("gpio", 1, 4, 2), FUNC("i2c", 0, 4, 2), }; -static struct rt2880_pmx_func refclk_grp_mt76x8[] = { FUNC("refclk", 0, 37, 1) }; -static struct rt2880_pmx_func perst_grp_mt76x8[] = { FUNC("perst", 0, 36, 1) }; -static struct rt2880_pmx_func wdt_grp_mt76x8[] = { FUNC("wdt", 0, 38, 1) }; -static struct rt2880_pmx_func spi_grp_mt76x8[] = { FUNC("spi", 0, 7, 4) }; +static struct ralink_pmx_func refclk_grp_mt76x8[] = { FUNC("refclk", 0, 37, 1) }; +static struct ralink_pmx_func perst_grp_mt76x8[] = { FUNC("perst", 0, 36, 1) }; +static struct ralink_pmx_func wdt_grp_mt76x8[] = { FUNC("wdt", 0, 38, 1) }; +static struct ralink_pmx_func spi_grp_mt76x8[] = { FUNC("spi", 0, 7, 4) }; -static struct rt2880_pmx_func sd_mode_grp_mt76x8[] = { +static struct ralink_pmx_func sd_mode_grp_mt76x8[] = { FUNC("jtag", 3, 22, 8), FUNC("utif", 2, 22, 8), FUNC("gpio", 1, 22, 8), FUNC("sdxc", 0, 22, 8), }; -static struct rt2880_pmx_func uart0_grp_mt76x8[] = { +static struct ralink_pmx_func uart0_grp_mt76x8[] = { FUNC("-", 3, 12, 2), FUNC("-", 2, 12, 2), FUNC("gpio", 1, 12, 2), FUNC("uart0", 0, 12, 2), }; -static struct rt2880_pmx_func i2s_grp_mt76x8[] = { +static struct ralink_pmx_func i2s_grp_mt76x8[] = { FUNC("antenna", 3, 0, 4), FUNC("pcm", 2, 0, 4), FUNC("gpio", 1, 0, 4), FUNC("i2s", 0, 0, 4), }; -static struct rt2880_pmx_func spi_cs1_grp_mt76x8[] = { +static struct ralink_pmx_func spi_cs1_grp_mt76x8[] = { FUNC("-", 3, 6, 1), FUNC("refclk", 2, 6, 1), FUNC("gpio", 1, 6, 1), FUNC("spi cs1", 0, 6, 1), }; -static struct rt2880_pmx_func spis_grp_mt76x8[] = { +static struct ralink_pmx_func spis_grp_mt76x8[] = { FUNC("pwm_uart2", 3, 14, 4), FUNC("utif", 2, 14, 4), FUNC("gpio", 1, 14, 4), FUNC("spis", 0, 14, 4), }; -static struct rt2880_pmx_func gpio_grp_mt76x8[] = { +static struct ralink_pmx_func gpio_grp_mt76x8[] = { FUNC("pcie", 3, 11, 1), FUNC("refclk", 2, 11, 1), FUNC("gpio", 1, 11, 1), FUNC("gpio", 0, 11, 1), }; -static struct rt2880_pmx_func p4led_kn_grp_mt76x8[] = { +static struct ralink_pmx_func p4led_kn_grp_mt76x8[] = { FUNC("jtag", 3, 30, 1), FUNC("utif", 2, 30, 1), FUNC("gpio", 1, 30, 1), FUNC("p4led_kn", 0, 30, 1), }; -static struct rt2880_pmx_func p3led_kn_grp_mt76x8[] = { +static struct ralink_pmx_func p3led_kn_grp_mt76x8[] = { FUNC("jtag", 3, 31, 1), FUNC("utif", 2, 31, 1), FUNC("gpio", 1, 31, 1), FUNC("p3led_kn", 0, 31, 1), }; -static struct rt2880_pmx_func p2led_kn_grp_mt76x8[] = { +static struct ralink_pmx_func p2led_kn_grp_mt76x8[] = { FUNC("jtag", 3, 32, 1), FUNC("utif", 2, 32, 1), FUNC("gpio", 1, 32, 1), FUNC("p2led_kn", 0, 32, 1), }; -static struct rt2880_pmx_func p1led_kn_grp_mt76x8[] = { +static struct ralink_pmx_func p1led_kn_grp_mt76x8[] = { FUNC("jtag", 3, 33, 1), FUNC("utif", 2, 33, 1), FUNC("gpio", 1, 33, 1), FUNC("p1led_kn", 0, 33, 1), }; -static struct rt2880_pmx_func p0led_kn_grp_mt76x8[] = { +static struct ralink_pmx_func p0led_kn_grp_mt76x8[] = { FUNC("jtag", 3, 34, 1), FUNC("rsvd", 2, 34, 1), FUNC("gpio", 1, 34, 1), FUNC("p0led_kn", 0, 34, 1), }; -static struct rt2880_pmx_func wled_kn_grp_mt76x8[] = { +static struct ralink_pmx_func wled_kn_grp_mt76x8[] = { FUNC("rsvd", 3, 35, 1), FUNC("rsvd", 2, 35, 1), FUNC("gpio", 1, 35, 1), FUNC("wled_kn", 0, 35, 1), }; -static struct rt2880_pmx_func p4led_an_grp_mt76x8[] = { +static struct ralink_pmx_func p4led_an_grp_mt76x8[] = { FUNC("jtag", 3, 39, 1), FUNC("utif", 2, 39, 1), FUNC("gpio", 1, 39, 1), FUNC("p4led_an", 0, 39, 1), }; -static struct rt2880_pmx_func p3led_an_grp_mt76x8[] = { +static struct ralink_pmx_func p3led_an_grp_mt76x8[] = { FUNC("jtag", 3, 40, 1), FUNC("utif", 2, 40, 1), FUNC("gpio", 1, 40, 1), FUNC("p3led_an", 0, 40, 1), }; -static struct rt2880_pmx_func p2led_an_grp_mt76x8[] = { +static struct ralink_pmx_func p2led_an_grp_mt76x8[] = { FUNC("jtag", 3, 41, 1), FUNC("utif", 2, 41, 1), FUNC("gpio", 1, 41, 1), FUNC("p2led_an", 0, 41, 1), }; -static struct rt2880_pmx_func p1led_an_grp_mt76x8[] = { +static struct ralink_pmx_func p1led_an_grp_mt76x8[] = { FUNC("jtag", 3, 42, 1), FUNC("utif", 2, 42, 1), FUNC("gpio", 1, 42, 1), FUNC("p1led_an", 0, 42, 1), }; -static struct rt2880_pmx_func p0led_an_grp_mt76x8[] = { +static struct ralink_pmx_func p0led_an_grp_mt76x8[] = { FUNC("jtag", 3, 43, 1), FUNC("rsvd", 2, 43, 1), FUNC("gpio", 1, 43, 1), FUNC("p0led_an", 0, 43, 1), }; -static struct rt2880_pmx_func wled_an_grp_mt76x8[] = { +static struct ralink_pmx_func wled_an_grp_mt76x8[] = { FUNC("rsvd", 3, 44, 1), FUNC("rsvd", 2, 44, 1), FUNC("gpio", 1, 44, 1), @@ -308,7 +308,7 @@ static struct rt2880_pmx_func wled_an_grp_mt76x8[] = { #define MT76X8_GPIO_MODE_SPIS 2 #define MT76X8_GPIO_MODE_GPIO 0 -static struct rt2880_pmx_group mt76x8_pinmux_data[] = { +static struct ralink_pmx_group mt76x8_pinmux_data[] = { GRP_G("pwm1", pwm1_grp_mt76x8, MT76X8_GPIO_MODE_MASK, 1, MT76X8_GPIO_MODE_PWM1), GRP_G("pwm0", pwm0_grp_mt76x8, MT76X8_GPIO_MODE_MASK, @@ -365,9 +365,9 @@ static struct rt2880_pmx_group mt76x8_pinmux_data[] = { static int mt7620_pinmux_probe(struct platform_device *pdev) { if (is_mt76x8()) - return rt2880_pinmux_init(pdev, mt76x8_pinmux_data); + return ralink_pinmux_init(pdev, mt76x8_pinmux_data); else - return rt2880_pinmux_init(pdev, mt7620a_pinmux_data); + return ralink_pinmux_init(pdev, mt7620a_pinmux_data); } static const struct of_device_id mt7620_pinmux_match[] = { diff --git a/drivers/pinctrl/ralink/pinctrl-mt7621.c b/drivers/pinctrl/ralink/pinctrl-mt7621.c index 7d96144c474e7c..14b89cb43d4cbf 100644 --- a/drivers/pinctrl/ralink/pinctrl-mt7621.c +++ b/drivers/pinctrl/ralink/pinctrl-mt7621.c @@ -3,7 +3,7 @@ #include #include #include -#include "pinmux.h" +#include "pinctrl-ralink.h" #define MT7621_GPIO_MODE_UART1 1 #define MT7621_GPIO_MODE_I2C 2 @@ -34,40 +34,40 @@ #define MT7621_GPIO_MODE_SDHCI_SHIFT 18 #define MT7621_GPIO_MODE_SDHCI_GPIO 1 -static struct rt2880_pmx_func uart1_grp[] = { FUNC("uart1", 0, 1, 2) }; -static struct rt2880_pmx_func i2c_grp[] = { FUNC("i2c", 0, 3, 2) }; -static struct rt2880_pmx_func uart3_grp[] = { +static struct ralink_pmx_func uart1_grp[] = { FUNC("uart1", 0, 1, 2) }; +static struct ralink_pmx_func i2c_grp[] = { FUNC("i2c", 0, 3, 2) }; +static struct ralink_pmx_func uart3_grp[] = { FUNC("uart3", 0, 5, 4), FUNC("i2s", 2, 5, 4), FUNC("spdif3", 3, 5, 4), }; -static struct rt2880_pmx_func uart2_grp[] = { +static struct ralink_pmx_func uart2_grp[] = { FUNC("uart2", 0, 9, 4), FUNC("pcm", 2, 9, 4), FUNC("spdif2", 3, 9, 4), }; -static struct rt2880_pmx_func jtag_grp[] = { FUNC("jtag", 0, 13, 5) }; -static struct rt2880_pmx_func wdt_grp[] = { +static struct ralink_pmx_func jtag_grp[] = { FUNC("jtag", 0, 13, 5) }; +static struct ralink_pmx_func wdt_grp[] = { FUNC("wdt rst", 0, 18, 1), FUNC("wdt refclk", 2, 18, 1), }; -static struct rt2880_pmx_func pcie_rst_grp[] = { +static struct ralink_pmx_func pcie_rst_grp[] = { FUNC("pcie rst", MT7621_GPIO_MODE_PCIE_RST, 19, 1), FUNC("pcie refclk", MT7621_GPIO_MODE_PCIE_REF, 19, 1) }; -static struct rt2880_pmx_func mdio_grp[] = { FUNC("mdio", 0, 20, 2) }; -static struct rt2880_pmx_func rgmii2_grp[] = { FUNC("rgmii2", 0, 22, 12) }; -static struct rt2880_pmx_func spi_grp[] = { +static struct ralink_pmx_func mdio_grp[] = { FUNC("mdio", 0, 20, 2) }; +static struct ralink_pmx_func rgmii2_grp[] = { FUNC("rgmii2", 0, 22, 12) }; +static struct ralink_pmx_func spi_grp[] = { FUNC("spi", 0, 34, 7), FUNC("nand1", 2, 34, 7), }; -static struct rt2880_pmx_func sdhci_grp[] = { +static struct ralink_pmx_func sdhci_grp[] = { FUNC("sdhci", 0, 41, 8), FUNC("nand2", 2, 41, 8), }; -static struct rt2880_pmx_func rgmii1_grp[] = { FUNC("rgmii1", 0, 49, 12) }; +static struct ralink_pmx_func rgmii1_grp[] = { FUNC("rgmii1", 0, 49, 12) }; -static struct rt2880_pmx_group mt7621_pinmux_data[] = { +static struct ralink_pmx_group mt7621_pinmux_data[] = { GRP("uart1", uart1_grp, 1, MT7621_GPIO_MODE_UART1), GRP("i2c", i2c_grp, 1, MT7621_GPIO_MODE_I2C), GRP_G("uart3", uart3_grp, MT7621_GPIO_MODE_UART3_MASK, @@ -92,7 +92,7 @@ static struct rt2880_pmx_group mt7621_pinmux_data[] = { static int mt7621_pinmux_probe(struct platform_device *pdev) { - return rt2880_pinmux_init(pdev, mt7621_pinmux_data); + return ralink_pinmux_init(pdev, mt7621_pinmux_data); } static const struct of_device_id mt7621_pinmux_match[] = { diff --git a/drivers/pinctrl/ralink/pinctrl-rt2880.c b/drivers/pinctrl/ralink/pinctrl-ralink.c similarity index 73% rename from drivers/pinctrl/ralink/pinctrl-rt2880.c rename to drivers/pinctrl/ralink/pinctrl-ralink.c index 96fc06d1b8b92d..841f23f55c9508 100644 --- a/drivers/pinctrl/ralink/pinctrl-rt2880.c +++ b/drivers/pinctrl/ralink/pinctrl-ralink.c @@ -19,23 +19,23 @@ #include #include -#include "pinmux.h" +#include "pinctrl-ralink.h" #include "../core.h" #include "../pinctrl-utils.h" #define SYSC_REG_GPIO_MODE 0x60 #define SYSC_REG_GPIO_MODE2 0x64 -struct rt2880_priv { +struct ralink_priv { struct device *dev; struct pinctrl_pin_desc *pads; struct pinctrl_desc *desc; - struct rt2880_pmx_func **func; + struct ralink_pmx_func **func; int func_count; - struct rt2880_pmx_group *groups; + struct ralink_pmx_group *groups; const char **group_names; int group_count; @@ -43,27 +43,27 @@ struct rt2880_priv { int max_pins; }; -static int rt2880_get_group_count(struct pinctrl_dev *pctrldev) +static int ralink_get_group_count(struct pinctrl_dev *pctrldev) { - struct rt2880_priv *p = pinctrl_dev_get_drvdata(pctrldev); + struct ralink_priv *p = pinctrl_dev_get_drvdata(pctrldev); return p->group_count; } -static const char *rt2880_get_group_name(struct pinctrl_dev *pctrldev, +static const char *ralink_get_group_name(struct pinctrl_dev *pctrldev, unsigned int group) { - struct rt2880_priv *p = pinctrl_dev_get_drvdata(pctrldev); + struct ralink_priv *p = pinctrl_dev_get_drvdata(pctrldev); return (group >= p->group_count) ? NULL : p->group_names[group]; } -static int rt2880_get_group_pins(struct pinctrl_dev *pctrldev, +static int ralink_get_group_pins(struct pinctrl_dev *pctrldev, unsigned int group, const unsigned int **pins, unsigned int *num_pins) { - struct rt2880_priv *p = pinctrl_dev_get_drvdata(pctrldev); + struct ralink_priv *p = pinctrl_dev_get_drvdata(pctrldev); if (group >= p->group_count) return -EINVAL; @@ -74,35 +74,35 @@ static int rt2880_get_group_pins(struct pinctrl_dev *pctrldev, return 0; } -static const struct pinctrl_ops rt2880_pctrl_ops = { - .get_groups_count = rt2880_get_group_count, - .get_group_name = rt2880_get_group_name, - .get_group_pins = rt2880_get_group_pins, +static const struct pinctrl_ops ralink_pctrl_ops = { + .get_groups_count = ralink_get_group_count, + .get_group_name = ralink_get_group_name, + .get_group_pins = ralink_get_group_pins, .dt_node_to_map = pinconf_generic_dt_node_to_map_all, .dt_free_map = pinconf_generic_dt_free_map, }; -static int rt2880_pmx_func_count(struct pinctrl_dev *pctrldev) +static int ralink_pmx_func_count(struct pinctrl_dev *pctrldev) { - struct rt2880_priv *p = pinctrl_dev_get_drvdata(pctrldev); + struct ralink_priv *p = pinctrl_dev_get_drvdata(pctrldev); return p->func_count; } -static const char *rt2880_pmx_func_name(struct pinctrl_dev *pctrldev, +static const char *ralink_pmx_func_name(struct pinctrl_dev *pctrldev, unsigned int func) { - struct rt2880_priv *p = pinctrl_dev_get_drvdata(pctrldev); + struct ralink_priv *p = pinctrl_dev_get_drvdata(pctrldev); return p->func[func]->name; } -static int rt2880_pmx_group_get_groups(struct pinctrl_dev *pctrldev, +static int ralink_pmx_group_get_groups(struct pinctrl_dev *pctrldev, unsigned int func, const char * const **groups, unsigned int * const num_groups) { - struct rt2880_priv *p = pinctrl_dev_get_drvdata(pctrldev); + struct ralink_priv *p = pinctrl_dev_get_drvdata(pctrldev); if (p->func[func]->group_count == 1) *groups = &p->group_names[p->func[func]->groups[0]]; @@ -114,10 +114,10 @@ static int rt2880_pmx_group_get_groups(struct pinctrl_dev *pctrldev, return 0; } -static int rt2880_pmx_group_enable(struct pinctrl_dev *pctrldev, +static int ralink_pmx_group_enable(struct pinctrl_dev *pctrldev, unsigned int func, unsigned int group) { - struct rt2880_priv *p = pinctrl_dev_get_drvdata(pctrldev); + struct ralink_priv *p = pinctrl_dev_get_drvdata(pctrldev); u32 mode = 0; u32 reg = SYSC_REG_GPIO_MODE; int i; @@ -158,11 +158,11 @@ static int rt2880_pmx_group_enable(struct pinctrl_dev *pctrldev, return 0; } -static int rt2880_pmx_group_gpio_request_enable(struct pinctrl_dev *pctrldev, +static int ralink_pmx_group_gpio_request_enable(struct pinctrl_dev *pctrldev, struct pinctrl_gpio_range *range, unsigned int pin) { - struct rt2880_priv *p = pinctrl_dev_get_drvdata(pctrldev); + struct ralink_priv *p = pinctrl_dev_get_drvdata(pctrldev); if (!p->gpio[pin]) { dev_err(p->dev, "pin %d is not set to gpio mux\n", pin); @@ -172,28 +172,28 @@ static int rt2880_pmx_group_gpio_request_enable(struct pinctrl_dev *pctrldev, return 0; } -static const struct pinmux_ops rt2880_pmx_group_ops = { - .get_functions_count = rt2880_pmx_func_count, - .get_function_name = rt2880_pmx_func_name, - .get_function_groups = rt2880_pmx_group_get_groups, - .set_mux = rt2880_pmx_group_enable, - .gpio_request_enable = rt2880_pmx_group_gpio_request_enable, +static const struct pinmux_ops ralink_pmx_group_ops = { + .get_functions_count = ralink_pmx_func_count, + .get_function_name = ralink_pmx_func_name, + .get_function_groups = ralink_pmx_group_get_groups, + .set_mux = ralink_pmx_group_enable, + .gpio_request_enable = ralink_pmx_group_gpio_request_enable, }; -static struct pinctrl_desc rt2880_pctrl_desc = { +static struct pinctrl_desc ralink_pctrl_desc = { .owner = THIS_MODULE, - .name = "rt2880-pinmux", - .pctlops = &rt2880_pctrl_ops, - .pmxops = &rt2880_pmx_group_ops, + .name = "ralink-pinmux", + .pctlops = &ralink_pctrl_ops, + .pmxops = &ralink_pmx_group_ops, }; -static struct rt2880_pmx_func gpio_func = { +static struct ralink_pmx_func gpio_func = { .name = "gpio", }; -static int rt2880_pinmux_index(struct rt2880_priv *p) +static int ralink_pinmux_index(struct ralink_priv *p) { - struct rt2880_pmx_group *mux = p->groups; + struct ralink_pmx_group *mux = p->groups; int i, j, c = 0; /* count the mux functions */ @@ -248,7 +248,7 @@ static int rt2880_pinmux_index(struct rt2880_priv *p) return 0; } -static int rt2880_pinmux_pins(struct rt2880_priv *p) +static int ralink_pinmux_pins(struct ralink_priv *p) { int i, j; @@ -311,10 +311,10 @@ static int rt2880_pinmux_pins(struct rt2880_priv *p) return 0; } -int rt2880_pinmux_init(struct platform_device *pdev, - struct rt2880_pmx_group *data) +int ralink_pinmux_init(struct platform_device *pdev, + struct ralink_pmx_group *data) { - struct rt2880_priv *p; + struct ralink_priv *p; struct pinctrl_dev *dev; int err; @@ -322,23 +322,23 @@ int rt2880_pinmux_init(struct platform_device *pdev, return -ENOTSUPP; /* setup the private data */ - p = devm_kzalloc(&pdev->dev, sizeof(struct rt2880_priv), GFP_KERNEL); + p = devm_kzalloc(&pdev->dev, sizeof(struct ralink_priv), GFP_KERNEL); if (!p) return -ENOMEM; p->dev = &pdev->dev; - p->desc = &rt2880_pctrl_desc; + p->desc = &ralink_pctrl_desc; p->groups = data; platform_set_drvdata(pdev, p); /* init the device */ - err = rt2880_pinmux_index(p); + err = ralink_pinmux_index(p); if (err) { dev_err(&pdev->dev, "failed to load index\n"); return err; } - err = rt2880_pinmux_pins(p); + err = ralink_pinmux_pins(p); if (err) { dev_err(&pdev->dev, "failed to load pins\n"); return err; diff --git a/drivers/pinctrl/ralink/pinmux.h b/drivers/pinctrl/ralink/pinctrl-ralink.h similarity index 75% rename from drivers/pinctrl/ralink/pinmux.h rename to drivers/pinctrl/ralink/pinctrl-ralink.h index 0046abe3bcc798..13496940958527 100644 --- a/drivers/pinctrl/ralink/pinmux.h +++ b/drivers/pinctrl/ralink/pinctrl-ralink.h @@ -3,8 +3,8 @@ * Copyright (C) 2012 John Crispin */ -#ifndef _RT288X_PINMUX_H__ -#define _RT288X_PINMUX_H__ +#ifndef _PINCTRL_RALINK_H__ +#define _PINCTRL_RALINK_H__ #define FUNC(name, value, pin_first, pin_count) \ { name, value, pin_first, pin_count } @@ -19,9 +19,9 @@ .func = _func, .gpio = _gpio, \ .func_count = ARRAY_SIZE(_func) } -struct rt2880_pmx_group; +struct ralink_pmx_group; -struct rt2880_pmx_func { +struct ralink_pmx_func { const char *name; const char value; @@ -35,7 +35,7 @@ struct rt2880_pmx_func { int enabled; }; -struct rt2880_pmx_group { +struct ralink_pmx_group { const char *name; int enabled; @@ -43,11 +43,11 @@ struct rt2880_pmx_group { const char mask; const char gpio; - struct rt2880_pmx_func *func; + struct ralink_pmx_func *func; int func_count; }; -int rt2880_pinmux_init(struct platform_device *pdev, - struct rt2880_pmx_group *data); +int ralink_pinmux_init(struct platform_device *pdev, + struct ralink_pmx_group *data); #endif diff --git a/drivers/pinctrl/ralink/pinctrl-rt288x.c b/drivers/pinctrl/ralink/pinctrl-rt288x.c index 0744aebbace529..40c45140ff8a39 100644 --- a/drivers/pinctrl/ralink/pinctrl-rt288x.c +++ b/drivers/pinctrl/ralink/pinctrl-rt288x.c @@ -4,7 +4,7 @@ #include #include #include -#include "pinmux.h" +#include "pinctrl-ralink.h" #define RT2880_GPIO_MODE_I2C BIT(0) #define RT2880_GPIO_MODE_UART0 BIT(1) @@ -15,15 +15,15 @@ #define RT2880_GPIO_MODE_SDRAM BIT(6) #define RT2880_GPIO_MODE_PCI BIT(7) -static struct rt2880_pmx_func i2c_func[] = { FUNC("i2c", 0, 1, 2) }; -static struct rt2880_pmx_func spi_func[] = { FUNC("spi", 0, 3, 4) }; -static struct rt2880_pmx_func uartlite_func[] = { FUNC("uartlite", 0, 7, 8) }; -static struct rt2880_pmx_func jtag_func[] = { FUNC("jtag", 0, 17, 5) }; -static struct rt2880_pmx_func mdio_func[] = { FUNC("mdio", 0, 22, 2) }; -static struct rt2880_pmx_func sdram_func[] = { FUNC("sdram", 0, 24, 16) }; -static struct rt2880_pmx_func pci_func[] = { FUNC("pci", 0, 40, 32) }; +static struct ralink_pmx_func i2c_func[] = { FUNC("i2c", 0, 1, 2) }; +static struct ralink_pmx_func spi_func[] = { FUNC("spi", 0, 3, 4) }; +static struct ralink_pmx_func uartlite_func[] = { FUNC("uartlite", 0, 7, 8) }; +static struct ralink_pmx_func jtag_func[] = { FUNC("jtag", 0, 17, 5) }; +static struct ralink_pmx_func mdio_func[] = { FUNC("mdio", 0, 22, 2) }; +static struct ralink_pmx_func sdram_func[] = { FUNC("sdram", 0, 24, 16) }; +static struct ralink_pmx_func pci_func[] = { FUNC("pci", 0, 40, 32) }; -static struct rt2880_pmx_group rt2880_pinmux_data_act[] = { +static struct ralink_pmx_group rt2880_pinmux_data_act[] = { GRP("i2c", i2c_func, 1, RT2880_GPIO_MODE_I2C), GRP("spi", spi_func, 1, RT2880_GPIO_MODE_SPI), GRP("uartlite", uartlite_func, 1, RT2880_GPIO_MODE_UART0), @@ -36,7 +36,7 @@ static struct rt2880_pmx_group rt2880_pinmux_data_act[] = { static int rt288x_pinmux_probe(struct platform_device *pdev) { - return rt2880_pinmux_init(pdev, rt2880_pinmux_data_act); + return ralink_pinmux_init(pdev, rt2880_pinmux_data_act); } static const struct of_device_id rt288x_pinmux_match[] = { diff --git a/drivers/pinctrl/ralink/pinctrl-rt305x.c b/drivers/pinctrl/ralink/pinctrl-rt305x.c index 5d8fa156c0037b..25527ca1ccaae5 100644 --- a/drivers/pinctrl/ralink/pinctrl-rt305x.c +++ b/drivers/pinctrl/ralink/pinctrl-rt305x.c @@ -5,7 +5,7 @@ #include #include #include -#include "pinmux.h" +#include "pinctrl-ralink.h" #define RT305X_GPIO_MODE_UART0_SHIFT 2 #define RT305X_GPIO_MODE_UART0_MASK 0x7 @@ -31,9 +31,9 @@ #define RT3352_GPIO_MODE_LNA 18 #define RT3352_GPIO_MODE_PA 20 -static struct rt2880_pmx_func i2c_func[] = { FUNC("i2c", 0, 1, 2) }; -static struct rt2880_pmx_func spi_func[] = { FUNC("spi", 0, 3, 4) }; -static struct rt2880_pmx_func uartf_func[] = { +static struct ralink_pmx_func i2c_func[] = { FUNC("i2c", 0, 1, 2) }; +static struct ralink_pmx_func spi_func[] = { FUNC("spi", 0, 3, 4) }; +static struct ralink_pmx_func uartf_func[] = { FUNC("uartf", RT305X_GPIO_MODE_UARTF, 7, 8), FUNC("pcm uartf", RT305X_GPIO_MODE_PCM_UARTF, 7, 8), FUNC("pcm i2s", RT305X_GPIO_MODE_PCM_I2S, 7, 8), @@ -42,28 +42,28 @@ static struct rt2880_pmx_func uartf_func[] = { FUNC("gpio uartf", RT305X_GPIO_MODE_GPIO_UARTF, 7, 4), FUNC("gpio i2s", RT305X_GPIO_MODE_GPIO_I2S, 7, 4), }; -static struct rt2880_pmx_func uartlite_func[] = { FUNC("uartlite", 0, 15, 2) }; -static struct rt2880_pmx_func jtag_func[] = { FUNC("jtag", 0, 17, 5) }; -static struct rt2880_pmx_func mdio_func[] = { FUNC("mdio", 0, 22, 2) }; -static struct rt2880_pmx_func rt5350_led_func[] = { FUNC("led", 0, 22, 5) }; -static struct rt2880_pmx_func rt5350_cs1_func[] = { +static struct ralink_pmx_func uartlite_func[] = { FUNC("uartlite", 0, 15, 2) }; +static struct ralink_pmx_func jtag_func[] = { FUNC("jtag", 0, 17, 5) }; +static struct ralink_pmx_func mdio_func[] = { FUNC("mdio", 0, 22, 2) }; +static struct ralink_pmx_func rt5350_led_func[] = { FUNC("led", 0, 22, 5) }; +static struct ralink_pmx_func rt5350_cs1_func[] = { FUNC("spi_cs1", 0, 27, 1), FUNC("wdg_cs1", 1, 27, 1), }; -static struct rt2880_pmx_func sdram_func[] = { FUNC("sdram", 0, 24, 16) }; -static struct rt2880_pmx_func rt3352_rgmii_func[] = { +static struct ralink_pmx_func sdram_func[] = { FUNC("sdram", 0, 24, 16) }; +static struct ralink_pmx_func rt3352_rgmii_func[] = { FUNC("rgmii", 0, 24, 12) }; -static struct rt2880_pmx_func rgmii_func[] = { FUNC("rgmii", 0, 40, 12) }; -static struct rt2880_pmx_func rt3352_lna_func[] = { FUNC("lna", 0, 36, 2) }; -static struct rt2880_pmx_func rt3352_pa_func[] = { FUNC("pa", 0, 38, 2) }; -static struct rt2880_pmx_func rt3352_led_func[] = { FUNC("led", 0, 40, 5) }; -static struct rt2880_pmx_func rt3352_cs1_func[] = { +static struct ralink_pmx_func rgmii_func[] = { FUNC("rgmii", 0, 40, 12) }; +static struct ralink_pmx_func rt3352_lna_func[] = { FUNC("lna", 0, 36, 2) }; +static struct ralink_pmx_func rt3352_pa_func[] = { FUNC("pa", 0, 38, 2) }; +static struct ralink_pmx_func rt3352_led_func[] = { FUNC("led", 0, 40, 5) }; +static struct ralink_pmx_func rt3352_cs1_func[] = { FUNC("spi_cs1", 0, 45, 1), FUNC("wdg_cs1", 1, 45, 1), }; -static struct rt2880_pmx_group rt3050_pinmux_data[] = { +static struct ralink_pmx_group rt3050_pinmux_data[] = { GRP("i2c", i2c_func, 1, RT305X_GPIO_MODE_I2C), GRP("spi", spi_func, 1, RT305X_GPIO_MODE_SPI), GRP("uartf", uartf_func, RT305X_GPIO_MODE_UART0_MASK, @@ -76,7 +76,7 @@ static struct rt2880_pmx_group rt3050_pinmux_data[] = { { 0 } }; -static struct rt2880_pmx_group rt3352_pinmux_data[] = { +static struct ralink_pmx_group rt3352_pinmux_data[] = { GRP("i2c", i2c_func, 1, RT305X_GPIO_MODE_I2C), GRP("spi", spi_func, 1, RT305X_GPIO_MODE_SPI), GRP("uartf", uartf_func, RT305X_GPIO_MODE_UART0_MASK, @@ -92,7 +92,7 @@ static struct rt2880_pmx_group rt3352_pinmux_data[] = { { 0 } }; -static struct rt2880_pmx_group rt5350_pinmux_data[] = { +static struct ralink_pmx_group rt5350_pinmux_data[] = { GRP("i2c", i2c_func, 1, RT305X_GPIO_MODE_I2C), GRP("spi", spi_func, 1, RT305X_GPIO_MODE_SPI), GRP("uartf", uartf_func, RT305X_GPIO_MODE_UART0_MASK, @@ -107,11 +107,11 @@ static struct rt2880_pmx_group rt5350_pinmux_data[] = { static int rt305x_pinmux_probe(struct platform_device *pdev) { if (soc_is_rt5350()) - return rt2880_pinmux_init(pdev, rt5350_pinmux_data); + return ralink_pinmux_init(pdev, rt5350_pinmux_data); else if (soc_is_rt305x() || soc_is_rt3350()) - return rt2880_pinmux_init(pdev, rt3050_pinmux_data); + return ralink_pinmux_init(pdev, rt3050_pinmux_data); else if (soc_is_rt3352()) - return rt2880_pinmux_init(pdev, rt3352_pinmux_data); + return ralink_pinmux_init(pdev, rt3352_pinmux_data); else return -EINVAL; } diff --git a/drivers/pinctrl/ralink/pinctrl-rt3883.c b/drivers/pinctrl/ralink/pinctrl-rt3883.c index 3e0e1b4caa647e..0b8674dbe18800 100644 --- a/drivers/pinctrl/ralink/pinctrl-rt3883.c +++ b/drivers/pinctrl/ralink/pinctrl-rt3883.c @@ -3,7 +3,7 @@ #include #include #include -#include "pinmux.h" +#include "pinctrl-ralink.h" #define RT3883_GPIO_MODE_UART0_SHIFT 2 #define RT3883_GPIO_MODE_UART0_MASK 0x7 @@ -39,9 +39,9 @@ #define RT3883_GPIO_MODE_LNA_G_GPIO 0x3 #define RT3883_GPIO_MODE_LNA_G _RT3883_GPIO_MODE_LNA_G(RT3883_GPIO_MODE_LNA_G_MASK) -static struct rt2880_pmx_func i2c_func[] = { FUNC("i2c", 0, 1, 2) }; -static struct rt2880_pmx_func spi_func[] = { FUNC("spi", 0, 3, 4) }; -static struct rt2880_pmx_func uartf_func[] = { +static struct ralink_pmx_func i2c_func[] = { FUNC("i2c", 0, 1, 2) }; +static struct ralink_pmx_func spi_func[] = { FUNC("spi", 0, 3, 4) }; +static struct ralink_pmx_func uartf_func[] = { FUNC("uartf", RT3883_GPIO_MODE_UARTF, 7, 8), FUNC("pcm uartf", RT3883_GPIO_MODE_PCM_UARTF, 7, 8), FUNC("pcm i2s", RT3883_GPIO_MODE_PCM_I2S, 7, 8), @@ -50,21 +50,21 @@ static struct rt2880_pmx_func uartf_func[] = { FUNC("gpio uartf", RT3883_GPIO_MODE_GPIO_UARTF, 7, 4), FUNC("gpio i2s", RT3883_GPIO_MODE_GPIO_I2S, 7, 4), }; -static struct rt2880_pmx_func uartlite_func[] = { FUNC("uartlite", 0, 15, 2) }; -static struct rt2880_pmx_func jtag_func[] = { FUNC("jtag", 0, 17, 5) }; -static struct rt2880_pmx_func mdio_func[] = { FUNC("mdio", 0, 22, 2) }; -static struct rt2880_pmx_func lna_a_func[] = { FUNC("lna a", 0, 32, 3) }; -static struct rt2880_pmx_func lna_g_func[] = { FUNC("lna g", 0, 35, 3) }; -static struct rt2880_pmx_func pci_func[] = { +static struct ralink_pmx_func uartlite_func[] = { FUNC("uartlite", 0, 15, 2) }; +static struct ralink_pmx_func jtag_func[] = { FUNC("jtag", 0, 17, 5) }; +static struct ralink_pmx_func mdio_func[] = { FUNC("mdio", 0, 22, 2) }; +static struct ralink_pmx_func lna_a_func[] = { FUNC("lna a", 0, 32, 3) }; +static struct ralink_pmx_func lna_g_func[] = { FUNC("lna g", 0, 35, 3) }; +static struct ralink_pmx_func pci_func[] = { FUNC("pci-dev", 0, 40, 32), FUNC("pci-host2", 1, 40, 32), FUNC("pci-host1", 2, 40, 32), FUNC("pci-fnc", 3, 40, 32) }; -static struct rt2880_pmx_func ge1_func[] = { FUNC("ge1", 0, 72, 12) }; -static struct rt2880_pmx_func ge2_func[] = { FUNC("ge2", 0, 84, 12) }; +static struct ralink_pmx_func ge1_func[] = { FUNC("ge1", 0, 72, 12) }; +static struct ralink_pmx_func ge2_func[] = { FUNC("ge2", 0, 84, 12) }; -static struct rt2880_pmx_group rt3883_pinmux_data[] = { +static struct ralink_pmx_group rt3883_pinmux_data[] = { GRP("i2c", i2c_func, 1, RT3883_GPIO_MODE_I2C), GRP("spi", spi_func, 1, RT3883_GPIO_MODE_SPI), GRP("uartf", uartf_func, RT3883_GPIO_MODE_UART0_MASK, @@ -83,7 +83,7 @@ static struct rt2880_pmx_group rt3883_pinmux_data[] = { static int rt3883_pinmux_probe(struct platform_device *pdev) { - return rt2880_pinmux_init(pdev, rt3883_pinmux_data); + return ralink_pinmux_init(pdev, rt3883_pinmux_data); } static const struct of_device_id rt3883_pinmux_match[] = { From 44016a85419ca0d4f1e4d0127b330f8e4e2a57d0 Mon Sep 17 00:00:00 2001 From: William Dean Date: Sun, 10 Jul 2022 23:49:22 +0800 Subject: [PATCH 0787/1056] pinctrl: ralink: Check for null return of devm_kcalloc [ Upstream commit c3b821e8e406d5650e587b7ac624ac24e9b780a8 ] Because of the possible failure of the allocation, data->domains might be NULL pointer and will cause the dereference of the NULL pointer later. Therefore, it might be better to check it and directly return -ENOMEM without releasing data manually if fails, because the comment of the devm_kmalloc() says "Memory allocated with this function is automatically freed on driver detach.". Fixes: a86854d0c599b ("treewide: devm_kzalloc() -> devm_kcalloc()") Reported-by: Hacash Robot Signed-off-by: William Dean Link: https://lore.kernel.org/r/20220710154922.2610876-1-williamsukatube@163.com Signed-off-by: Linus Walleij Signed-off-by: Sasha Levin --- drivers/pinctrl/ralink/pinctrl-ralink.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/pinctrl/ralink/pinctrl-ralink.c b/drivers/pinctrl/ralink/pinctrl-ralink.c index 841f23f55c9508..3a8268a43d74a4 100644 --- a/drivers/pinctrl/ralink/pinctrl-ralink.c +++ b/drivers/pinctrl/ralink/pinctrl-ralink.c @@ -266,6 +266,8 @@ static int ralink_pinmux_pins(struct ralink_priv *p) p->func[i]->pin_count, sizeof(int), GFP_KERNEL); + if (!p->func[i]->pins) + return -ENOMEM; for (j = 0; j < p->func[i]->pin_count; j++) p->func[i]->pins[j] = p->func[i]->pin_first + j; From da3c256e2d0ebc87c7db0c605c9692b6f1722074 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 5 Jul 2022 15:07:26 +0200 Subject: [PATCH 0788/1056] perf/core: Fix data race between perf_event_set_output() and perf_mmap_close() [ Upstream commit 68e3c69803dada336893640110cb87221bb01dcf ] Yang Jihing reported a race between perf_event_set_output() and perf_mmap_close(): CPU1 CPU2 perf_mmap_close(e2) if (atomic_dec_and_test(&e2->rb->mmap_count)) // 1 - > 0 detach_rest = true ioctl(e1, IOC_SET_OUTPUT, e2) perf_event_set_output(e1, e2) ... list_for_each_entry_rcu(e, &e2->rb->event_list, rb_entry) ring_buffer_attach(e, NULL); // e1 isn't yet added and // therefore not detached ring_buffer_attach(e1, e2->rb) list_add_rcu(&e1->rb_entry, &e2->rb->event_list) After this; e1 is attached to an unmapped rb and a subsequent perf_mmap() will loop forever more: again: mutex_lock(&e->mmap_mutex); if (event->rb) { ... if (!atomic_inc_not_zero(&e->rb->mmap_count)) { ... mutex_unlock(&e->mmap_mutex); goto again; } } The loop in perf_mmap_close() holds e2->mmap_mutex, while the attach in perf_event_set_output() holds e1->mmap_mutex. As such there is no serialization to avoid this race. Change perf_event_set_output() to take both e1->mmap_mutex and e2->mmap_mutex to alleviate that problem. Additionally, have the loop in perf_mmap() detach the rb directly, this avoids having to wait for the concurrent perf_mmap_close() to get around to doing it to make progress. Fixes: 9bb5d40cd93c ("perf: Fix mmap() accounting hole") Reported-by: Yang Jihong Signed-off-by: Peter Zijlstra (Intel) Tested-by: Yang Jihong Link: https://lkml.kernel.org/r/YsQ3jm2GR38SW7uD@worktop.programming.kicks-ass.net Signed-off-by: Sasha Levin --- kernel/events/core.c | 45 ++++++++++++++++++++++++++++++-------------- 1 file changed, 31 insertions(+), 14 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index d7e05d9375602a..c6c7a4d8057337 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -6355,10 +6355,10 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma) if (!atomic_inc_not_zero(&event->rb->mmap_count)) { /* - * Raced against perf_mmap_close() through - * perf_event_set_output(). Try again, hope for better - * luck. + * Raced against perf_mmap_close(); remove the + * event and try again. */ + ring_buffer_attach(event, NULL); mutex_unlock(&event->mmap_mutex); goto again; } @@ -11892,14 +11892,25 @@ static int perf_copy_attr(struct perf_event_attr __user *uattr, goto out; } +static void mutex_lock_double(struct mutex *a, struct mutex *b) +{ + if (b < a) + swap(a, b); + + mutex_lock(a); + mutex_lock_nested(b, SINGLE_DEPTH_NESTING); +} + static int perf_event_set_output(struct perf_event *event, struct perf_event *output_event) { struct perf_buffer *rb = NULL; int ret = -EINVAL; - if (!output_event) + if (!output_event) { + mutex_lock(&event->mmap_mutex); goto set; + } /* don't allow circular references */ if (event == output_event) @@ -11937,8 +11948,15 @@ perf_event_set_output(struct perf_event *event, struct perf_event *output_event) event->pmu != output_event->pmu) goto out; + /* + * Hold both mmap_mutex to serialize against perf_mmap_close(). Since + * output_event is already on rb->event_list, and the list iteration + * restarts after every removal, it is guaranteed this new event is + * observed *OR* if output_event is already removed, it's guaranteed we + * observe !rb->mmap_count. + */ + mutex_lock_double(&event->mmap_mutex, &output_event->mmap_mutex); set: - mutex_lock(&event->mmap_mutex); /* Can't redirect output if we've got an active mmap() */ if (atomic_read(&event->mmap_count)) goto unlock; @@ -11948,6 +11966,12 @@ perf_event_set_output(struct perf_event *event, struct perf_event *output_event) rb = ring_buffer_get(output_event); if (!rb) goto unlock; + + /* did we race against perf_mmap_close() */ + if (!atomic_read(&rb->mmap_count)) { + ring_buffer_put(rb); + goto unlock; + } } ring_buffer_attach(event, rb); @@ -11955,20 +11979,13 @@ perf_event_set_output(struct perf_event *event, struct perf_event *output_event) ret = 0; unlock: mutex_unlock(&event->mmap_mutex); + if (output_event) + mutex_unlock(&output_event->mmap_mutex); out: return ret; } -static void mutex_lock_double(struct mutex *a, struct mutex *b) -{ - if (b < a) - swap(a, b); - - mutex_lock(a); - mutex_lock_nested(b, SINGLE_DEPTH_NESTING); -} - static int perf_event_set_clock(struct perf_event *event, clockid_t clk_id) { bool nmi_safe = false; From f52af853be7ac96ba8ced03c9136297fe5843b0b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 24 Jan 2022 12:24:57 -0800 Subject: [PATCH 0789/1056] ipv4/tcp: do not use per netns ctl sockets [ Upstream commit 37ba017dcc3b1123206808979834655ddcf93251 ] TCP ipv4 uses per-cpu/per-netns ctl sockets in order to send RST and some ACK packets (on behalf of TIMEWAIT sockets). This adds memory and cpu costs, which do not seem needed. Now typical servers have 256 or more cores, this adds considerable tax to netns users. tcp sockets are used from BH context, are not receiving packets, and do not store any persistent state but the 'struct net' pointer in order to be able to use IPv4 output functions. Note that I attempted a related change in the past, that had to be hot-fixed in commit bdbbb8527b6f ("ipv4: tcp: get rid of ugly unicast_sock") This patch could very well surface old bugs, on layers not taking care of sk->sk_kern_sock properly. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- include/net/netns/ipv4.h | 1 - net/ipv4/tcp_ipv4.c | 61 ++++++++++++++++++---------------------- 2 files changed, 27 insertions(+), 35 deletions(-) diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 6c5b2efc4f17d0..d60a10cfc38237 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -74,7 +74,6 @@ struct netns_ipv4 { struct sock *mc_autojoin_sk; struct inet_peer_base *peers; - struct sock * __percpu *tcp_sk; struct fqdir *fqdir; u8 sysctl_icmp_echo_ignore_all; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 5d94822fd5069f..b9a9f288bfa600 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -91,6 +91,8 @@ static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key, struct inet_hashinfo tcp_hashinfo; EXPORT_SYMBOL(tcp_hashinfo); +static DEFINE_PER_CPU(struct sock *, ipv4_tcp_sk); + static u32 tcp_v4_init_seq(const struct sk_buff *skb) { return secure_tcp_seq(ip_hdr(skb)->daddr, @@ -807,7 +809,8 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb) arg.tos = ip_hdr(skb)->tos; arg.uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL); local_bh_disable(); - ctl_sk = this_cpu_read(*net->ipv4.tcp_sk); + ctl_sk = this_cpu_read(ipv4_tcp_sk); + sock_net_set(ctl_sk, net); if (sk) { ctl_sk->sk_mark = (sk->sk_state == TCP_TIME_WAIT) ? inet_twsk(sk)->tw_mark : sk->sk_mark; @@ -822,6 +825,7 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb) transmit_time); ctl_sk->sk_mark = 0; + sock_net_set(ctl_sk, &init_net); __TCP_INC_STATS(net, TCP_MIB_OUTSEGS); __TCP_INC_STATS(net, TCP_MIB_OUTRSTS); local_bh_enable(); @@ -905,7 +909,8 @@ static void tcp_v4_send_ack(const struct sock *sk, arg.tos = tos; arg.uid = sock_net_uid(net, sk_fullsock(sk) ? sk : NULL); local_bh_disable(); - ctl_sk = this_cpu_read(*net->ipv4.tcp_sk); + ctl_sk = this_cpu_read(ipv4_tcp_sk); + sock_net_set(ctl_sk, net); ctl_sk->sk_mark = (sk->sk_state == TCP_TIME_WAIT) ? inet_twsk(sk)->tw_mark : sk->sk_mark; ctl_sk->sk_priority = (sk->sk_state == TCP_TIME_WAIT) ? @@ -918,6 +923,7 @@ static void tcp_v4_send_ack(const struct sock *sk, transmit_time); ctl_sk->sk_mark = 0; + sock_net_set(ctl_sk, &init_net); __TCP_INC_STATS(net, TCP_MIB_OUTSEGS); local_bh_enable(); } @@ -3103,41 +3109,14 @@ EXPORT_SYMBOL(tcp_prot); static void __net_exit tcp_sk_exit(struct net *net) { - int cpu; - if (net->ipv4.tcp_congestion_control) bpf_module_put(net->ipv4.tcp_congestion_control, net->ipv4.tcp_congestion_control->owner); - - for_each_possible_cpu(cpu) - inet_ctl_sock_destroy(*per_cpu_ptr(net->ipv4.tcp_sk, cpu)); - free_percpu(net->ipv4.tcp_sk); } static int __net_init tcp_sk_init(struct net *net) { - int res, cpu, cnt; - - net->ipv4.tcp_sk = alloc_percpu(struct sock *); - if (!net->ipv4.tcp_sk) - return -ENOMEM; - - for_each_possible_cpu(cpu) { - struct sock *sk; - - res = inet_ctl_sock_create(&sk, PF_INET, SOCK_RAW, - IPPROTO_TCP, net); - if (res) - goto fail; - sock_set_flag(sk, SOCK_USE_WRITE_QUEUE); - - /* Please enforce IP_DF and IPID==0 for RST and - * ACK sent in SYN-RECV and TIME-WAIT state. - */ - inet_sk(sk)->pmtudisc = IP_PMTUDISC_DO; - - *per_cpu_ptr(net->ipv4.tcp_sk, cpu) = sk; - } + int cnt; net->ipv4.sysctl_tcp_ecn = 2; net->ipv4.sysctl_tcp_ecn_fallback = 1; @@ -3221,10 +3200,6 @@ static int __net_init tcp_sk_init(struct net *net) net->ipv4.tcp_congestion_control = &tcp_reno; return 0; -fail: - tcp_sk_exit(net); - - return res; } static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list) @@ -3318,6 +3293,24 @@ static void __init bpf_iter_register(void) void __init tcp_v4_init(void) { + int cpu, res; + + for_each_possible_cpu(cpu) { + struct sock *sk; + + res = inet_ctl_sock_create(&sk, PF_INET, SOCK_RAW, + IPPROTO_TCP, &init_net); + if (res) + panic("Failed to create the TCP control socket.\n"); + sock_set_flag(sk, SOCK_USE_WRITE_QUEUE); + + /* Please enforce IP_DF and IPID==0 for RST and + * ACK sent in SYN-RECV and TIME-WAIT state. + */ + inet_sk(sk)->pmtudisc = IP_PMTUDISC_DO; + + per_cpu(ipv4_tcp_sk, cpu) = sk; + } if (register_pernet_subsys(&tcp_sk_ops)) panic("Failed to create the TCP control socket.\n"); From d2dba226acb9baed97a714e0f7f81a89b2feb63b Mon Sep 17 00:00:00 2001 From: Dongli Zhang Date: Fri, 4 Mar 2022 06:55:06 -0800 Subject: [PATCH 0790/1056] net: tun: split run_ebpf_filter() and pskb_trim() into different "if statement" [ Upstream commit 45a15d89fbcd280571eba8e5ca309e14ba6afa8f ] No functional change. Just to split the if statement into different conditions to use kfree_skb_reason() to trace the reason later. Cc: Joao Martins Cc: Joe Jin Signed-off-by: Dongli Zhang Reviewed-by: David Ahern Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/tun.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index ea60453fe69aac..f92d6a12831fe5 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -1097,7 +1097,10 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev) goto drop; len = run_ebpf_filter(tun, skb, len); - if (len == 0 || pskb_trim(skb, len)) + if (len == 0) + goto drop; + + if (pskb_trim(skb, len)) goto drop; if (unlikely(skb_orphan_frags_rx(skb, GFP_ATOMIC))) From e2b6c5f7ee95ba7f7ab374c8cc99b211b6be91e9 Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Fri, 14 Jan 2022 14:07:17 -0800 Subject: [PATCH 0791/1056] mm/pagealloc: sysctl: change watermark_scale_factor max limit to 30% [ Upstream commit 39c65a94cd9661532be150e88f8b02f4a6844a35 ] For embedded systems with low total memory, having to run applications with relatively large memory requirements, 10% max limitation for watermark_scale_factor poses an issue of triggering direct reclaim every time such application is started. This results in slow application startup times and bad end-user experience. By increasing watermark_scale_factor max limit we allow vendors more flexibility to choose the right level of kswapd aggressiveness for their device and workload requirements. Link: https://lkml.kernel.org/r/20211124193604.2758863-1-surenb@google.com Signed-off-by: Suren Baghdasaryan Acked-by: Johannes Weiner Cc: Michal Hocko Cc: Lukas Middendorf Cc: Antti Palosaari Cc: Luis Chamberlain Cc: Kees Cook Cc: Iurii Zaikin Cc: Dave Hansen Cc: Vlastimil Babka Cc: Mel Gorman Cc: Jonathan Corbet Cc: Zhang Yi Cc: Fengfei Xi Cc: Mike Rapoport Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- Documentation/admin-guide/sysctl/vm.rst | 2 +- kernel/sysctl.c | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/Documentation/admin-guide/sysctl/vm.rst b/Documentation/admin-guide/sysctl/vm.rst index 5e795202111f2f..f4804ce37c58b9 100644 --- a/Documentation/admin-guide/sysctl/vm.rst +++ b/Documentation/admin-guide/sysctl/vm.rst @@ -948,7 +948,7 @@ how much memory needs to be free before kswapd goes back to sleep. The unit is in fractions of 10,000. The default value of 10 means the distances between watermarks are 0.1% of the available memory in the -node/system. The maximum value is 1000, or 10% of memory. +node/system. The maximum value is 3000, or 30% of memory. A high rate of threads entering direct reclaim (allocstall) or kswapd going to sleep prematurely (kswapd_low_wmark_hit_quickly) can indicate diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 25c18b2df68486..347a90a878b3b6 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -122,6 +122,7 @@ static unsigned long long_max = LONG_MAX; static int one_hundred = 100; static int two_hundred = 200; static int one_thousand = 1000; +static int three_thousand = 3000; #ifdef CONFIG_PRINTK static int ten_thousand = 10000; #endif @@ -2971,7 +2972,7 @@ static struct ctl_table vm_table[] = { .mode = 0644, .proc_handler = watermark_scale_factor_sysctl_handler, .extra1 = SYSCTL_ONE, - .extra2 = &one_thousand, + .extra2 = &three_thousand, }, { .procname = "percpu_pagelist_high_fraction", From 9cb4959493cd13ee4e5080cb98437649470e650c Mon Sep 17 00:00:00 2001 From: Xiaoming Ni Date: Fri, 21 Jan 2022 22:10:55 -0800 Subject: [PATCH 0792/1056] sysctl: move some boundary constants from sysctl.c to sysctl_vals [ Upstream commit 78e36f3b0dae586f623c4a37ec5eb5496f5abbe1 ] sysctl has helpers which let us specify boundary values for a min or max int value. Since these are used for a boundary check only they don't change, so move these variables to sysctl_vals to avoid adding duplicate variables. This will help with our cleanup of kernel/sysctl.c. [akpm@linux-foundation.org: update it for "mm/pagealloc: sysctl: change watermark_scale_factor max limit to 30%"] [mcgrof@kernel.org: major rebase] Link: https://lkml.kernel.org/r/20211123202347.818157-3-mcgrof@kernel.org Signed-off-by: Xiaoming Ni Signed-off-by: Luis Chamberlain Reviewed-by: Kees Cook Cc: Al Viro Cc: Amir Goldstein Cc: Andy Shevchenko Cc: Benjamin LaHaise Cc: "Eric W. Biederman" Cc: Greg Kroah-Hartman Cc: Iurii Zaikin Cc: Jan Kara Cc: Paul Turner Cc: Peter Zijlstra Cc: Petr Mladek Cc: Qing Wang Cc: Sebastian Reichel Cc: Sergey Senozhatsky Cc: Stephen Kitt Cc: Tetsuo Handa Cc: Antti Palosaari Cc: Arnd Bergmann Cc: Benjamin Herrenschmidt Cc: Clemens Ladisch Cc: David Airlie Cc: Jani Nikula Cc: Joel Becker Cc: Joonas Lahtinen Cc: Joseph Qi Cc: Julia Lawall Cc: Lukas Middendorf Cc: Mark Fasheh Cc: Phillip Potter Cc: Rodrigo Vivi Cc: Douglas Gilbert Cc: James E.J. Bottomley Cc: Jani Nikula Cc: John Ogness Cc: Martin K. Petersen Cc: "Rafael J. Wysocki" Cc: Steven Rostedt (VMware) Cc: Suren Baghdasaryan Cc: "Theodore Ts'o" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- fs/proc/proc_sysctl.c | 2 +- include/linux/sysctl.h | 13 +++++++++--- kernel/sysctl.c | 45 ++++++++++++++++++------------------------ 3 files changed, 30 insertions(+), 30 deletions(-) diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index 5d66faecd4ef06..013fc5931bc376 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -25,7 +25,7 @@ static const struct file_operations proc_sys_dir_file_operations; static const struct inode_operations proc_sys_dir_operations; /* shared constants to be used in various sysctls */ -const int sysctl_vals[] = { 0, 1, INT_MAX }; +const int sysctl_vals[] = { -1, 0, 1, 2, 4, 100, 200, 1000, 3000, INT_MAX }; EXPORT_SYMBOL(sysctl_vals); /* Support for permanently empty directories */ diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 1fa2b69c6fc3d2..fa372b4c231322 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -38,9 +38,16 @@ struct ctl_table_header; struct ctl_dir; /* Keep the same order as in fs/proc/proc_sysctl.c */ -#define SYSCTL_ZERO ((void *)&sysctl_vals[0]) -#define SYSCTL_ONE ((void *)&sysctl_vals[1]) -#define SYSCTL_INT_MAX ((void *)&sysctl_vals[2]) +#define SYSCTL_NEG_ONE ((void *)&sysctl_vals[0]) +#define SYSCTL_ZERO ((void *)&sysctl_vals[1]) +#define SYSCTL_ONE ((void *)&sysctl_vals[2]) +#define SYSCTL_TWO ((void *)&sysctl_vals[3]) +#define SYSCTL_FOUR ((void *)&sysctl_vals[4]) +#define SYSCTL_ONE_HUNDRED ((void *)&sysctl_vals[5]) +#define SYSCTL_TWO_HUNDRED ((void *)&sysctl_vals[6]) +#define SYSCTL_ONE_THOUSAND ((void *)&sysctl_vals[7]) +#define SYSCTL_THREE_THOUSAND ((void *)&sysctl_vals[8]) +#define SYSCTL_INT_MAX ((void *)&sysctl_vals[9]) extern const int sysctl_vals[]; diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 347a90a878b3b6..23c08bf3db58cf 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -113,16 +113,9 @@ static int sixty = 60; #endif -static int __maybe_unused neg_one = -1; -static int __maybe_unused two = 2; -static int __maybe_unused four = 4; static unsigned long zero_ul; static unsigned long one_ul = 1; static unsigned long long_max = LONG_MAX; -static int one_hundred = 100; -static int two_hundred = 200; -static int one_thousand = 1000; -static int three_thousand = 3000; #ifdef CONFIG_PRINTK static int ten_thousand = 10000; #endif @@ -1973,7 +1966,7 @@ static struct ctl_table kern_table[] = { .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &neg_one, + .extra1 = SYSCTL_NEG_ONE, .extra2 = SYSCTL_ONE, }, #endif @@ -2315,7 +2308,7 @@ static struct ctl_table kern_table[] = { .mode = 0644, .proc_handler = proc_dointvec_minmax_sysadmin, .extra1 = SYSCTL_ZERO, - .extra2 = &two, + .extra2 = SYSCTL_TWO, }, #endif { @@ -2575,7 +2568,7 @@ static struct ctl_table kern_table[] = { .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &neg_one, + .extra1 = SYSCTL_NEG_ONE, }, #endif #ifdef CONFIG_RT_MUTEXES @@ -2637,7 +2630,7 @@ static struct ctl_table kern_table[] = { .mode = 0644, .proc_handler = perf_cpu_time_max_percent_handler, .extra1 = SYSCTL_ZERO, - .extra2 = &one_hundred, + .extra2 = SYSCTL_ONE_HUNDRED, }, { .procname = "perf_event_max_stack", @@ -2655,7 +2648,7 @@ static struct ctl_table kern_table[] = { .mode = 0644, .proc_handler = perf_event_max_stack_handler, .extra1 = SYSCTL_ZERO, - .extra2 = &one_thousand, + .extra2 = SYSCTL_ONE_THOUSAND, }, #endif { @@ -2686,7 +2679,7 @@ static struct ctl_table kern_table[] = { .mode = 0644, .proc_handler = bpf_unpriv_handler, .extra1 = SYSCTL_ZERO, - .extra2 = &two, + .extra2 = SYSCTL_TWO, }, { .procname = "bpf_stats_enabled", @@ -2740,7 +2733,7 @@ static struct ctl_table vm_table[] = { .mode = 0644, .proc_handler = overcommit_policy_handler, .extra1 = SYSCTL_ZERO, - .extra2 = &two, + .extra2 = SYSCTL_TWO, }, { .procname = "panic_on_oom", @@ -2749,7 +2742,7 @@ static struct ctl_table vm_table[] = { .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ZERO, - .extra2 = &two, + .extra2 = SYSCTL_TWO, }, { .procname = "oom_kill_allocating_task", @@ -2794,7 +2787,7 @@ static struct ctl_table vm_table[] = { .mode = 0644, .proc_handler = dirty_background_ratio_handler, .extra1 = SYSCTL_ZERO, - .extra2 = &one_hundred, + .extra2 = SYSCTL_ONE_HUNDRED, }, { .procname = "dirty_background_bytes", @@ -2811,7 +2804,7 @@ static struct ctl_table vm_table[] = { .mode = 0644, .proc_handler = dirty_ratio_handler, .extra1 = SYSCTL_ZERO, - .extra2 = &one_hundred, + .extra2 = SYSCTL_ONE_HUNDRED, }, { .procname = "dirty_bytes", @@ -2851,7 +2844,7 @@ static struct ctl_table vm_table[] = { .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ZERO, - .extra2 = &two_hundred, + .extra2 = SYSCTL_TWO_HUNDRED, }, #ifdef CONFIG_NUMA { @@ -2910,7 +2903,7 @@ static struct ctl_table vm_table[] = { .mode = 0200, .proc_handler = drop_caches_sysctl_handler, .extra1 = SYSCTL_ONE, - .extra2 = &four, + .extra2 = SYSCTL_FOUR, }, #ifdef CONFIG_COMPACTION { @@ -2927,7 +2920,7 @@ static struct ctl_table vm_table[] = { .mode = 0644, .proc_handler = compaction_proactiveness_sysctl_handler, .extra1 = SYSCTL_ZERO, - .extra2 = &one_hundred, + .extra2 = SYSCTL_ONE_HUNDRED, }, { .procname = "extfrag_threshold", @@ -2972,7 +2965,7 @@ static struct ctl_table vm_table[] = { .mode = 0644, .proc_handler = watermark_scale_factor_sysctl_handler, .extra1 = SYSCTL_ONE, - .extra2 = &three_thousand, + .extra2 = SYSCTL_THREE_THOUSAND, }, { .procname = "percpu_pagelist_high_fraction", @@ -3051,7 +3044,7 @@ static struct ctl_table vm_table[] = { .mode = 0644, .proc_handler = sysctl_min_unmapped_ratio_sysctl_handler, .extra1 = SYSCTL_ZERO, - .extra2 = &one_hundred, + .extra2 = SYSCTL_ONE_HUNDRED, }, { .procname = "min_slab_ratio", @@ -3060,7 +3053,7 @@ static struct ctl_table vm_table[] = { .mode = 0644, .proc_handler = sysctl_min_slab_ratio_sysctl_handler, .extra1 = SYSCTL_ZERO, - .extra2 = &one_hundred, + .extra2 = SYSCTL_ONE_HUNDRED, }, #endif #ifdef CONFIG_SMP @@ -3350,7 +3343,7 @@ static struct ctl_table fs_table[] = { .mode = 0600, .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ZERO, - .extra2 = &two, + .extra2 = SYSCTL_TWO, }, { .procname = "protected_regular", @@ -3359,7 +3352,7 @@ static struct ctl_table fs_table[] = { .mode = 0600, .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ZERO, - .extra2 = &two, + .extra2 = SYSCTL_TWO, }, { .procname = "suid_dumpable", @@ -3368,7 +3361,7 @@ static struct ctl_table fs_table[] = { .mode = 0644, .proc_handler = proc_dointvec_minmax_coredump, .extra1 = SYSCTL_ZERO, - .extra2 = &two, + .extra2 = SYSCTL_TWO, }, #if defined(CONFIG_BINFMT_MISC) || defined(CONFIG_BINFMT_MISC_MODULE) { From d1e0bbe081715827b7bf8d71d805ec49039c8fcd Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 11 Jul 2022 17:15:30 -0700 Subject: [PATCH 0793/1056] tcp: Fix data-races around sysctl_tcp_ecn. [ Upstream commit 4785a66702f086cf2ea84bdbe6ec921f274bd9f2 ] While reading sysctl_tcp_ecn, it can be changed concurrently. Thus, we need to add READ_ONCE() to its readers. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c | 2 +- net/ipv4/syncookies.c | 2 +- net/ipv4/sysctl_net_ipv4.c | 2 ++ net/ipv4/tcp_input.c | 2 +- net/ipv4/tcp_output.c | 2 +- 5 files changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c index 4af5561cbfc54c..7c760aa655404a 100644 --- a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c +++ b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c @@ -1392,7 +1392,7 @@ static void chtls_pass_accept_request(struct sock *sk, th_ecn = tcph->ece && tcph->cwr; if (th_ecn) { ect = !INET_ECN_is_not_ect(ip_dsfield); - ecn_ok = sock_net(sk)->ipv4.sysctl_tcp_ecn; + ecn_ok = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_ecn); if ((!ect && ecn_ok) || tcp_ca_needs_ecn(sk)) inet_rsk(oreq)->ecn_ok = 1; } diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 10b469aee4920c..fd1dc86ba5124d 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -275,7 +275,7 @@ bool cookie_ecn_ok(const struct tcp_options_received *tcp_opt, if (!ecn_ok) return false; - if (net->ipv4.sysctl_tcp_ecn) + if (READ_ONCE(net->ipv4.sysctl_tcp_ecn)) return true; return dst_feature(dst, RTAX_FEATURE_ECN); diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 616658e7c79682..ead5db7e24ea5a 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -689,6 +689,8 @@ static struct ctl_table ipv4_net_table[] = { .maxlen = sizeof(u8), .mode = 0644, .proc_handler = proc_dou8vec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_TWO, }, { .procname = "tcp_ecn_fallback", diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 0ff2f620f8e44f..ae06923fe8d056 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -6669,7 +6669,7 @@ static void tcp_ecn_create_request(struct request_sock *req, ect = !INET_ECN_is_not_ect(TCP_SKB_CB(skb)->ip_dsfield); ecn_ok_dst = dst_feature(dst, DST_FEATURE_ECN_MASK); - ecn_ok = net->ipv4.sysctl_tcp_ecn || ecn_ok_dst; + ecn_ok = READ_ONCE(net->ipv4.sysctl_tcp_ecn) || ecn_ok_dst; if (((!ect || th->res1) && ecn_ok) || tcp_ca_needs_ecn(listen_sk) || (ecn_ok_dst & DST_FEATURE_ECN_CA) || diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 509aab1b7ac994..0bd5c334ccceec 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -324,7 +324,7 @@ static void tcp_ecn_send_syn(struct sock *sk, struct sk_buff *skb) { struct tcp_sock *tp = tcp_sk(sk); bool bpf_needs_ecn = tcp_bpf_ca_needs_ecn(sk); - bool use_ecn = sock_net(sk)->ipv4.sysctl_tcp_ecn == 1 || + bool use_ecn = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_ecn) == 1 || tcp_ca_needs_ecn(sk) || bpf_needs_ecn; if (!use_ecn) { From 734d5ce02cb069cccedc993d8f1dc0ea41cfa3dd Mon Sep 17 00:00:00 2001 From: Jude Shih Date: Sun, 25 Jul 2021 13:55:02 +0800 Subject: [PATCH 0794/1056] drm/amd/display: Support for DMUB HPD interrupt handling [ Upstream commit e27c41d5b0681c597ac1894f4e02cf626e062250 ] [WHY] To add support for HPD interrupt handling from DMUB. HPD interrupt could be triggered from outbox1 from DMUB [HOW] 1) Use queue_work to handle hpd task from outbox1 2) Add handle_hpd_irq_helper to share interrupt handling code between legacy and DMUB HPD from outbox1 3) Added DMUB HPD handling in dmub_srv_stat_get_notification(). HPD handling callback function and wake up the DMUB thread. Reviewed-by: Nicholas Kazlauskas Acked-by: Qingqing Zhuo Signed-off-by: Jude Shih Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 171 +++++++++++++++++- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h | 40 ++++ 2 files changed, 203 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index d35a6f6d158eab..19048f0d83a479 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -215,6 +215,8 @@ static void handle_cursor_update(struct drm_plane *plane, static const struct drm_format_info * amd_get_format_info(const struct drm_mode_fb_cmd2 *cmd); +static void handle_hpd_irq_helper(struct amdgpu_dm_connector *aconnector); + static bool is_timing_unchanged_for_freesync(struct drm_crtc_state *old_crtc_state, struct drm_crtc_state *new_crtc_state); @@ -618,6 +620,116 @@ static void dm_dcn_vertical_interrupt0_high_irq(void *interrupt_params) } #endif +/** + * dmub_aux_setconfig_reply_callback - Callback for AUX or SET_CONFIG command. + * @adev: amdgpu_device pointer + * @notify: dmub notification structure + * + * Dmub AUX or SET_CONFIG command completion processing callback + * Copies dmub notification to DM which is to be read by AUX command. + * issuing thread and also signals the event to wake up the thread. + */ +void dmub_aux_setconfig_callback(struct amdgpu_device *adev, struct dmub_notification *notify) +{ + if (adev->dm.dmub_notify) + memcpy(adev->dm.dmub_notify, notify, sizeof(struct dmub_notification)); + if (notify->type == DMUB_NOTIFICATION_AUX_REPLY) + complete(&adev->dm.dmub_aux_transfer_done); +} + +/** + * dmub_hpd_callback - DMUB HPD interrupt processing callback. + * @adev: amdgpu_device pointer + * @notify: dmub notification structure + * + * Dmub Hpd interrupt processing callback. Gets displayindex through the + * ink index and calls helper to do the processing. + */ +void dmub_hpd_callback(struct amdgpu_device *adev, struct dmub_notification *notify) +{ + struct amdgpu_dm_connector *aconnector; + struct drm_connector *connector; + struct drm_connector_list_iter iter; + struct dc_link *link; + uint8_t link_index = 0; + struct drm_device *dev = adev->dm.ddev; + + if (adev == NULL) + return; + + if (notify == NULL) { + DRM_ERROR("DMUB HPD callback notification was NULL"); + return; + } + + if (notify->link_index > adev->dm.dc->link_count) { + DRM_ERROR("DMUB HPD index (%u)is abnormal", notify->link_index); + return; + } + + drm_modeset_lock(&dev->mode_config.connection_mutex, NULL); + + link_index = notify->link_index; + + link = adev->dm.dc->links[link_index]; + + drm_connector_list_iter_begin(dev, &iter); + drm_for_each_connector_iter(connector, &iter) { + aconnector = to_amdgpu_dm_connector(connector); + if (link && aconnector->dc_link == link) { + DRM_INFO("DMUB HPD callback: link_index=%u\n", link_index); + handle_hpd_irq_helper(aconnector); + break; + } + } + drm_connector_list_iter_end(&iter); + drm_modeset_unlock(&dev->mode_config.connection_mutex); + +} + +/** + * register_dmub_notify_callback - Sets callback for DMUB notify + * @adev: amdgpu_device pointer + * @type: Type of dmub notification + * @callback: Dmub interrupt callback function + * @dmub_int_thread_offload: offload indicator + * + * API to register a dmub callback handler for a dmub notification + * Also sets indicator whether callback processing to be offloaded. + * to dmub interrupt handling thread + * Return: true if successfully registered, false if there is existing registration + */ +bool register_dmub_notify_callback(struct amdgpu_device *adev, enum dmub_notification_type type, +dmub_notify_interrupt_callback_t callback, bool dmub_int_thread_offload) +{ + if (callback != NULL && type < ARRAY_SIZE(adev->dm.dmub_thread_offload)) { + adev->dm.dmub_callback[type] = callback; + adev->dm.dmub_thread_offload[type] = dmub_int_thread_offload; + } else + return false; + + return true; +} + +static void dm_handle_hpd_work(struct work_struct *work) +{ + struct dmub_hpd_work *dmub_hpd_wrk; + + dmub_hpd_wrk = container_of(work, struct dmub_hpd_work, handle_hpd_work); + + if (!dmub_hpd_wrk->dmub_notify) { + DRM_ERROR("dmub_hpd_wrk dmub_notify is NULL"); + return; + } + + if (dmub_hpd_wrk->dmub_notify->type < ARRAY_SIZE(dmub_hpd_wrk->adev->dm.dmub_callback)) { + dmub_hpd_wrk->adev->dm.dmub_callback[dmub_hpd_wrk->dmub_notify->type](dmub_hpd_wrk->adev, + dmub_hpd_wrk->dmub_notify); + } + kfree(dmub_hpd_wrk); + +} + #define DMUB_TRACE_MAX_READ 64 /** * dm_dmub_outbox1_low_irq() - Handles Outbox interrupt @@ -634,18 +746,33 @@ static void dm_dmub_outbox1_low_irq(void *interrupt_params) struct amdgpu_display_manager *dm = &adev->dm; struct dmcub_trace_buf_entry entry = { 0 }; uint32_t count = 0; + struct dmub_hpd_work *dmub_hpd_wrk; if (dc_enable_dmub_notifications(adev->dm.dc)) { + dmub_hpd_wrk = kzalloc(sizeof(*dmub_hpd_wrk), GFP_ATOMIC); + if (!dmub_hpd_wrk) { + DRM_ERROR("Failed to allocate dmub_hpd_wrk"); + return; + } + INIT_WORK(&dmub_hpd_wrk->handle_hpd_work, dm_handle_hpd_work); + if (irq_params->irq_src == DC_IRQ_SOURCE_DMCUB_OUTBOX) { do { dc_stat_get_dmub_notification(adev->dm.dc, ¬ify); - } while (notify.pending_notification); + if (notify.type > ARRAY_SIZE(dm->dmub_thread_offload)) { + DRM_ERROR("DM: notify type %d larger than the array size %ld !", notify.type, + ARRAY_SIZE(dm->dmub_thread_offload)); + continue; + } + if (dm->dmub_thread_offload[notify.type] == true) { + dmub_hpd_wrk->dmub_notify = ¬ify; + dmub_hpd_wrk->adev = adev; + queue_work(adev->dm.delayed_hpd_wq, &dmub_hpd_wrk->handle_hpd_work); + } else { + dm->dmub_callback[notify.type](adev, ¬ify); + } - if (adev->dm.dmub_notify) - memcpy(adev->dm.dmub_notify, ¬ify, sizeof(struct dmub_notification)); - if (notify.type == DMUB_NOTIFICATION_AUX_REPLY) - complete(&adev->dm.dmub_aux_transfer_done); - // TODO : HPD Implementation + } while (notify.pending_notification); } else { DRM_ERROR("DM: Failed to receive correct outbox IRQ !"); @@ -1287,7 +1414,25 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) DRM_INFO("amdgpu: fail to allocate adev->dm.dmub_notify"); goto error; } + + adev->dm.delayed_hpd_wq = create_singlethread_workqueue("amdgpu_dm_hpd_wq"); + if (!adev->dm.delayed_hpd_wq) { + DRM_ERROR("amdgpu: failed to create hpd offload workqueue.\n"); + goto error; + } + amdgpu_dm_outbox_init(adev); +#if defined(CONFIG_DRM_AMD_DC_DCN) + if (!register_dmub_notify_callback(adev, DMUB_NOTIFICATION_AUX_REPLY, + dmub_aux_setconfig_callback, false)) { + DRM_ERROR("amdgpu: fail to register dmub aux callback"); + goto error; + } + if (!register_dmub_notify_callback(adev, DMUB_NOTIFICATION_HPD, dmub_hpd_callback, true)) { + DRM_ERROR("amdgpu: fail to register dmub hpd callback"); + goto error; + } +#endif } if (amdgpu_dm_initialize_drm_device(adev)) { @@ -1369,6 +1514,8 @@ static void amdgpu_dm_fini(struct amdgpu_device *adev) if (dc_enable_dmub_notifications(adev->dm.dc)) { kfree(adev->dm.dmub_notify); adev->dm.dmub_notify = NULL; + destroy_workqueue(adev->dm.delayed_hpd_wq); + adev->dm.delayed_hpd_wq = NULL; } if (adev->dm.dmub_bo) @@ -2654,9 +2801,8 @@ void amdgpu_dm_update_connector_after_detect( dc_sink_release(sink); } -static void handle_hpd_irq(void *param) +static void handle_hpd_irq_helper(struct amdgpu_dm_connector *aconnector) { - struct amdgpu_dm_connector *aconnector = (struct amdgpu_dm_connector *)param; struct drm_connector *connector = &aconnector->base; struct drm_device *dev = connector->dev; enum dc_connection_type new_connection_type = dc_connection_none; @@ -2715,6 +2861,15 @@ static void handle_hpd_irq(void *param) } +static void handle_hpd_irq(void *param) +{ + struct amdgpu_dm_connector *aconnector = (struct amdgpu_dm_connector *)param; + + handle_hpd_irq_helper(aconnector); + +} + + static void dm_handle_hpd_rx_irq(struct amdgpu_dm_connector *aconnector) { uint8_t esi[DP_PSR_ERROR_STATUS - DP_SINK_COUNT_ESI] = { 0 }; diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h index 46d6e65f6bd481..da87ca77023d65 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h @@ -47,6 +47,8 @@ #define AMDGPU_DM_MAX_CRTC 6 #define AMDGPU_DM_MAX_NUM_EDP 2 + +#define AMDGPU_DMUB_NOTIFICATION_MAX 5 /* #include "include/amdgpu_dal_power_if.h" #include "amdgpu_dm_irq.h" @@ -86,6 +88,21 @@ struct dm_compressor_info { uint64_t gpu_addr; }; +typedef void (*dmub_notify_interrupt_callback_t)(struct amdgpu_device *adev, struct dmub_notification *notify); + +/** + * struct dmub_hpd_work - Handle time consuming work in low priority outbox IRQ + * + * @handle_hpd_work: Work to be executed in a separate thread to handle hpd_low_irq + * @dmub_notify: notification for callback function + * @adev: amdgpu_device pointer + */ +struct dmub_hpd_work { + struct work_struct handle_hpd_work; + struct dmub_notification *dmub_notify; + struct amdgpu_device *adev; +}; + /** * struct vblank_control_work - Work data for vblank control * @work: Kernel work data for the work event @@ -190,8 +207,30 @@ struct amdgpu_display_manager { */ struct dmub_srv *dmub_srv; + /** + * @dmub_notify: + * + * Notification from DMUB. + */ + struct dmub_notification *dmub_notify; + /** + * @dmub_callback: + * + * Callback functions to handle notification from DMUB. + */ + + dmub_notify_interrupt_callback_t dmub_callback[AMDGPU_DMUB_NOTIFICATION_MAX]; + + /** + * @dmub_thread_offload: + * + * Flag to indicate if callback is offload. + */ + + bool dmub_thread_offload[AMDGPU_DMUB_NOTIFICATION_MAX]; + /** * @dmub_fb_info: * @@ -439,6 +478,7 @@ struct amdgpu_display_manager { */ struct list_head da_list; struct completion dmub_aux_transfer_done; + struct workqueue_struct *delayed_hpd_wq; /** * @brightness: From 54740bc4b182c2522c6570c3fb0b96a912982bf3 Mon Sep 17 00:00:00 2001 From: Wayne Lin Date: Fri, 13 Aug 2021 15:54:47 +0800 Subject: [PATCH 0795/1056] drm/amd/display: Add option to defer works of hpd_rx_irq [ Upstream commit 410ad92d7fecd30de7456c19e326e272c2153ff2 ] [Why & How] Due to some code flow constraints, we need to defer dc_lock needed works from dc_link_handle_hpd_rx_irq(). Thus, do following changes: * Change allow_hpd_rx_irq() from static to public * Change handle_automated_test() from static to public * Extract link lost handling flow out from dc_link_handle_hpd_rx_irq() and put those into a new function dc_link_dp_handle_link_loss() * Add one option parameter to decide whether defer works within dc_link_handle_hpd_rx_irq() Acked-by: Mikita Lipski Signed-off-by: Wayne Lin Reviewed-by: Nicholas Kazlauskas Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- .../gpu/drm/amd/display/dc/core/dc_link_dp.c | 92 ++++++++++++------- drivers/gpu/drm/amd/display/dc/dc_link.h | 3 + 2 files changed, 63 insertions(+), 32 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c index 05f81d44aa6c2d..9b6111eb9ca445 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c @@ -2743,7 +2743,7 @@ void decide_link_settings(struct dc_stream_state *stream, } /*************************Short Pulse IRQ***************************/ -static bool allow_hpd_rx_irq(const struct dc_link *link) +bool dc_link_dp_allow_hpd_rx_irq(const struct dc_link *link) { /* * Don't handle RX IRQ unless one of following is met: @@ -3177,7 +3177,7 @@ static void dp_test_get_audio_test_data(struct dc_link *link, bool disable_video } } -static void handle_automated_test(struct dc_link *link) +void dc_link_dp_handle_automated_test(struct dc_link *link) { union test_request test_request; union test_response test_response; @@ -3226,17 +3226,50 @@ static void handle_automated_test(struct dc_link *link) sizeof(test_response)); } -bool dc_link_handle_hpd_rx_irq(struct dc_link *link, union hpd_irq_data *out_hpd_irq_dpcd_data, bool *out_link_loss) +void dc_link_dp_handle_link_loss(struct dc_link *link) +{ + int i; + struct pipe_ctx *pipe_ctx; + + for (i = 0; i < MAX_PIPES; i++) { + pipe_ctx = &link->dc->current_state->res_ctx.pipe_ctx[i]; + if (pipe_ctx && pipe_ctx->stream && pipe_ctx->stream->link == link) + break; + } + + if (pipe_ctx == NULL || pipe_ctx->stream == NULL) + return; + + for (i = 0; i < MAX_PIPES; i++) { + pipe_ctx = &link->dc->current_state->res_ctx.pipe_ctx[i]; + if (pipe_ctx && pipe_ctx->stream && !pipe_ctx->stream->dpms_off && + pipe_ctx->stream->link == link && !pipe_ctx->prev_odm_pipe) { + core_link_disable_stream(pipe_ctx); + } + } + + for (i = 0; i < MAX_PIPES; i++) { + pipe_ctx = &link->dc->current_state->res_ctx.pipe_ctx[i]; + if (pipe_ctx && pipe_ctx->stream && !pipe_ctx->stream->dpms_off && + pipe_ctx->stream->link == link && !pipe_ctx->prev_odm_pipe) { + core_link_enable_stream(link->dc->current_state, pipe_ctx); + } + } +} + +static bool handle_hpd_rx_irq(struct dc_link *link, union hpd_irq_data *out_hpd_irq_dpcd_data, bool *out_link_loss, + bool defer_handling, bool *has_left_work) { union hpd_irq_data hpd_irq_dpcd_data = { { { {0} } } }; union device_service_irq device_service_clear = { { 0 } }; enum dc_status result; bool status = false; - struct pipe_ctx *pipe_ctx; - int i; if (out_link_loss) *out_link_loss = false; + + if (has_left_work) + *has_left_work = false; /* For use cases related to down stream connection status change, * PSR and device auto test, refer to function handle_sst_hpd_irq * in DAL2.1*/ @@ -3268,11 +3301,14 @@ bool dc_link_handle_hpd_rx_irq(struct dc_link *link, union hpd_irq_data *out_hpd &device_service_clear.raw, sizeof(device_service_clear.raw)); device_service_clear.raw = 0; - handle_automated_test(link); + if (defer_handling && has_left_work) + *has_left_work = true; + else + dc_link_dp_handle_automated_test(link); return false; } - if (!allow_hpd_rx_irq(link)) { + if (!dc_link_dp_allow_hpd_rx_irq(link)) { DC_LOG_HW_HPD_IRQ("%s: skipping HPD handling on %d\n", __func__, link->link_index); return false; @@ -3286,12 +3322,18 @@ bool dc_link_handle_hpd_rx_irq(struct dc_link *link, union hpd_irq_data *out_hpd * so do not handle as a normal sink status change interrupt. */ - if (hpd_irq_dpcd_data.bytes.device_service_irq.bits.UP_REQ_MSG_RDY) + if (hpd_irq_dpcd_data.bytes.device_service_irq.bits.UP_REQ_MSG_RDY) { + if (defer_handling && has_left_work) + *has_left_work = true; return true; + } /* check if we have MST msg and return since we poll for it */ - if (hpd_irq_dpcd_data.bytes.device_service_irq.bits.DOWN_REP_MSG_RDY) + if (hpd_irq_dpcd_data.bytes.device_service_irq.bits.DOWN_REP_MSG_RDY) { + if (defer_handling && has_left_work) + *has_left_work = true; return false; + } /* For now we only handle 'Downstream port status' case. * If we got sink count changed it means @@ -3308,29 +3350,10 @@ bool dc_link_handle_hpd_rx_irq(struct dc_link *link, union hpd_irq_data *out_hpd sizeof(hpd_irq_dpcd_data), "Status: "); - for (i = 0; i < MAX_PIPES; i++) { - pipe_ctx = &link->dc->current_state->res_ctx.pipe_ctx[i]; - if (pipe_ctx && pipe_ctx->stream && pipe_ctx->stream->link == link) - break; - } - - if (pipe_ctx == NULL || pipe_ctx->stream == NULL) - return false; - - - for (i = 0; i < MAX_PIPES; i++) { - pipe_ctx = &link->dc->current_state->res_ctx.pipe_ctx[i]; - if (pipe_ctx && pipe_ctx->stream && !pipe_ctx->stream->dpms_off && - pipe_ctx->stream->link == link && !pipe_ctx->prev_odm_pipe) - core_link_disable_stream(pipe_ctx); - } - - for (i = 0; i < MAX_PIPES; i++) { - pipe_ctx = &link->dc->current_state->res_ctx.pipe_ctx[i]; - if (pipe_ctx && pipe_ctx->stream && !pipe_ctx->stream->dpms_off && - pipe_ctx->stream->link == link && !pipe_ctx->prev_odm_pipe) - core_link_enable_stream(link->dc->current_state, pipe_ctx); - } + if (defer_handling && has_left_work) + *has_left_work = true; + else + dc_link_dp_handle_link_loss(link); status = false; if (out_link_loss) @@ -3356,6 +3379,11 @@ bool dc_link_handle_hpd_rx_irq(struct dc_link *link, union hpd_irq_data *out_hpd return status; } +bool dc_link_handle_hpd_rx_irq(struct dc_link *link, union hpd_irq_data *out_hpd_irq_dpcd_data, bool *out_link_loss) +{ + return handle_hpd_rx_irq(link, out_hpd_irq_dpcd_data, out_link_loss, false, NULL); +} + /*query dpcd for version and mst cap addresses*/ bool is_mst_supported(struct dc_link *link) { diff --git a/drivers/gpu/drm/amd/display/dc/dc_link.h b/drivers/gpu/drm/amd/display/dc/dc_link.h index 83845d006c54a2..0efa2bc8639baf 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_link.h +++ b/drivers/gpu/drm/amd/display/dc/dc_link.h @@ -308,6 +308,9 @@ bool dc_link_wait_for_t12(struct dc_link *link); enum dc_status read_hpd_rx_irq_data( struct dc_link *link, union hpd_irq_data *irq_data); +void dc_link_dp_handle_automated_test(struct dc_link *link); +void dc_link_dp_handle_link_loss(struct dc_link *link); +bool dc_link_dp_allow_hpd_rx_irq(const struct dc_link *link); struct dc_sink_init_data; From e980e1d978e0eb4c0399cff37f175779237db53b Mon Sep 17 00:00:00 2001 From: Wayne Lin Date: Fri, 23 Jul 2021 11:50:28 +0800 Subject: [PATCH 0796/1056] drm/amd/display: Fork thread to offload work of hpd_rx_irq [ Upstream commit 8e794421bc981586d0af4e959ec76d668c793a55 ] [Why] Currently, we will try to get dm.dc_lock in handle_hpd_rx_irq() when link lost happened, which is risky and could cause deadlock. e.g. If we are under procedure to enable MST streams and then monitor happens to toggle short hpd to notify link lost, then handle_hpd_rx_irq() will get blocked due to stream enabling flow has dc_lock. However, under MST, enabling streams involves communication with remote sinks which need to use handle_hpd_rx_irq() to handle sideband messages. Thus, we have deadlock here. [How] Target is to have handle_hpd_rx_irq() finished as soon as possilble. Hence we can react to interrupt quickly. Besides, we should avoid to grabe dm.dc_lock within handle_hpd_rx_irq() to avoid deadlock situation. Firstly, revert patches which introduced to use dm.dc_lock in handle_hpd_rx_irq(): * commit ("drm/amd/display: NULL pointer error during ") * commit ("drm/amd/display: Only one display lights up while using MST") * commit ("drm/amd/display: take dc_lock in short pulse handler only") Instead, create work to handle irq events which needs dm.dc_lock. Besides: * Create struct hpd_rx_irq_offload_work_queue for each link to handle its short hpd events * Avoid to handle link lost/ automated test if the link is disconnected * Defer dc_lock needed works in dc_link_handle_hpd_rx_irq(). This function should just handle simple stuff for us (e.g. DPCD R/W). However, deferred works should still be handled by the order that dc_link_handle_hpd_rx_irq() used to be. * Change function name dm_handle_hpd_rx_irq() to dm_handle_mst_sideband_msg() to be more specific Reviewed-by: Nicholas Kazlauskas Acked-by: Mikita Lipski Signed-off-by: Wayne Lin Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 203 ++++++++++++++---- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h | 49 ++++- .../gpu/drm/amd/display/dc/core/dc_link_dp.c | 9 +- drivers/gpu/drm/amd/display/dc/dc_link.h | 6 +- 4 files changed, 219 insertions(+), 48 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 19048f0d83a479..ce647302a1ec36 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -1236,6 +1236,83 @@ static void vblank_control_worker(struct work_struct *work) } #endif + +static void dm_handle_hpd_rx_offload_work(struct work_struct *work) +{ + struct hpd_rx_irq_offload_work *offload_work; + struct amdgpu_dm_connector *aconnector; + struct dc_link *dc_link; + struct amdgpu_device *adev; + enum dc_connection_type new_connection_type = dc_connection_none; + unsigned long flags; + + offload_work = container_of(work, struct hpd_rx_irq_offload_work, work); + aconnector = offload_work->offload_wq->aconnector; + + if (!aconnector) { + DRM_ERROR("Can't retrieve aconnector in hpd_rx_irq_offload_work"); + goto skip; + } + + adev = drm_to_adev(aconnector->base.dev); + dc_link = aconnector->dc_link; + + mutex_lock(&aconnector->hpd_lock); + if (!dc_link_detect_sink(dc_link, &new_connection_type)) + DRM_ERROR("KMS: Failed to detect connector\n"); + mutex_unlock(&aconnector->hpd_lock); + + if (new_connection_type == dc_connection_none) + goto skip; + + if (amdgpu_in_reset(adev)) + goto skip; + + mutex_lock(&adev->dm.dc_lock); + if (offload_work->data.bytes.device_service_irq.bits.AUTOMATED_TEST) + dc_link_dp_handle_automated_test(dc_link); + else if ((dc_link->connector_signal != SIGNAL_TYPE_EDP) && + hpd_rx_irq_check_link_loss_status(dc_link, &offload_work->data) && + dc_link_dp_allow_hpd_rx_irq(dc_link)) { + dc_link_dp_handle_link_loss(dc_link); + spin_lock_irqsave(&offload_work->offload_wq->offload_lock, flags); + offload_work->offload_wq->is_handling_link_loss = false; + spin_unlock_irqrestore(&offload_work->offload_wq->offload_lock, flags); + } + mutex_unlock(&adev->dm.dc_lock); + +skip: + kfree(offload_work); + +} + +static struct hpd_rx_irq_offload_work_queue *hpd_rx_irq_create_workqueue(struct dc *dc) +{ + int max_caps = dc->caps.max_links; + int i = 0; + struct hpd_rx_irq_offload_work_queue *hpd_rx_offload_wq = NULL; + + hpd_rx_offload_wq = kcalloc(max_caps, sizeof(*hpd_rx_offload_wq), GFP_KERNEL); + + if (!hpd_rx_offload_wq) + return NULL; + + + for (i = 0; i < max_caps; i++) { + hpd_rx_offload_wq[i].wq = + create_singlethread_workqueue("amdgpu_dm_hpd_rx_offload_wq"); + + if (hpd_rx_offload_wq[i].wq == NULL) { + DRM_ERROR("create amdgpu_dm_hpd_rx_offload_wq fail!"); + return NULL; + } + + spin_lock_init(&hpd_rx_offload_wq[i].offload_lock); + } + + return hpd_rx_offload_wq; +} + static int amdgpu_dm_init(struct amdgpu_device *adev) { struct dc_init_data init_data; @@ -1362,6 +1439,12 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) dc_hardware_init(adev->dm.dc); + adev->dm.hpd_rx_offload_wq = hpd_rx_irq_create_workqueue(adev->dm.dc); + if (!adev->dm.hpd_rx_offload_wq) { + DRM_ERROR("amdgpu: failed to create hpd rx offload workqueue.\n"); + goto error; + } + #if defined(CONFIG_DRM_AMD_DC_DCN) if ((adev->flags & AMD_IS_APU) && (adev->asic_type >= CHIP_CARRIZO)) { struct dc_phy_addr_space_config pa_config; @@ -1541,6 +1624,18 @@ static void amdgpu_dm_fini(struct amdgpu_device *adev) adev->dm.freesync_module = NULL; } + if (adev->dm.hpd_rx_offload_wq) { + for (i = 0; i < adev->dm.dc->caps.max_links; i++) { + if (adev->dm.hpd_rx_offload_wq[i].wq) { + destroy_workqueue(adev->dm.hpd_rx_offload_wq[i].wq); + adev->dm.hpd_rx_offload_wq[i].wq = NULL; + } + } + + kfree(adev->dm.hpd_rx_offload_wq); + adev->dm.hpd_rx_offload_wq = NULL; + } + mutex_destroy(&adev->dm.audio_lock); mutex_destroy(&adev->dm.dc_lock); @@ -2160,6 +2255,16 @@ static enum dc_status amdgpu_dm_commit_zero_streams(struct dc *dc) return res; } +static void hpd_rx_irq_work_suspend(struct amdgpu_display_manager *dm) +{ + int i; + + if (dm->hpd_rx_offload_wq) { + for (i = 0; i < dm->dc->caps.max_links; i++) + flush_workqueue(dm->hpd_rx_offload_wq[i].wq); + } +} + static int dm_suspend(void *handle) { struct amdgpu_device *adev = handle; @@ -2181,6 +2286,8 @@ static int dm_suspend(void *handle) amdgpu_dm_irq_suspend(adev); + hpd_rx_irq_work_suspend(dm); + return ret; } @@ -2191,6 +2298,8 @@ static int dm_suspend(void *handle) amdgpu_dm_irq_suspend(adev); + hpd_rx_irq_work_suspend(dm); + dc_set_power_state(dm->dc, DC_ACPI_CM_POWER_STATE_D3); return 0; @@ -2869,8 +2978,7 @@ static void handle_hpd_irq(void *param) } - -static void dm_handle_hpd_rx_irq(struct amdgpu_dm_connector *aconnector) +static void dm_handle_mst_sideband_msg(struct amdgpu_dm_connector *aconnector) { uint8_t esi[DP_PSR_ERROR_STATUS - DP_SINK_COUNT_ESI] = { 0 }; uint8_t dret; @@ -2948,6 +3056,25 @@ static void dm_handle_hpd_rx_irq(struct amdgpu_dm_connector *aconnector) DRM_DEBUG_DRIVER("Loop exceeded max iterations\n"); } +static void schedule_hpd_rx_offload_work(struct hpd_rx_irq_offload_work_queue *offload_wq, + union hpd_irq_data hpd_irq_data) +{ + struct hpd_rx_irq_offload_work *offload_work = + kzalloc(sizeof(*offload_work), GFP_KERNEL); + + if (!offload_work) { + DRM_ERROR("Failed to allocate hpd_rx_irq_offload_work.\n"); + return; + } + + INIT_WORK(&offload_work->work, dm_handle_hpd_rx_offload_work); + offload_work->data = hpd_irq_data; + offload_work->offload_wq = offload_wq; + + queue_work(offload_wq->wq, &offload_work->work); + DRM_DEBUG_KMS("queue work to handle hpd_rx offload work"); +} + static void handle_hpd_rx_irq(void *param) { struct amdgpu_dm_connector *aconnector = (struct amdgpu_dm_connector *)param; @@ -2959,14 +3086,16 @@ static void handle_hpd_rx_irq(void *param) enum dc_connection_type new_connection_type = dc_connection_none; struct amdgpu_device *adev = drm_to_adev(dev); union hpd_irq_data hpd_irq_data; - bool lock_flag = 0; + bool link_loss = false; + bool has_left_work = false; + int idx = aconnector->base.index; + struct hpd_rx_irq_offload_work_queue *offload_wq = &adev->dm.hpd_rx_offload_wq[idx]; memset(&hpd_irq_data, 0, sizeof(hpd_irq_data)); if (adev->dm.disable_hpd_irq) return; - /* * TODO:Temporary add mutex to protect hpd interrupt not have a gpio * conflict, after implement i2c helper, this mutex should be @@ -2974,43 +3103,41 @@ static void handle_hpd_rx_irq(void *param) */ mutex_lock(&aconnector->hpd_lock); - read_hpd_rx_irq_data(dc_link, &hpd_irq_data); + result = dc_link_handle_hpd_rx_irq(dc_link, &hpd_irq_data, + &link_loss, true, &has_left_work); - if ((dc_link->cur_link_settings.lane_count != LANE_COUNT_UNKNOWN) || - (dc_link->type == dc_connection_mst_branch)) { - if (hpd_irq_data.bytes.device_service_irq.bits.UP_REQ_MSG_RDY) { - result = true; - dm_handle_hpd_rx_irq(aconnector); - goto out; - } else if (hpd_irq_data.bytes.device_service_irq.bits.DOWN_REP_MSG_RDY) { - result = false; - dm_handle_hpd_rx_irq(aconnector); + if (!has_left_work) + goto out; + + if (hpd_irq_data.bytes.device_service_irq.bits.AUTOMATED_TEST) { + schedule_hpd_rx_offload_work(offload_wq, hpd_irq_data); + goto out; + } + + if (dc_link_dp_allow_hpd_rx_irq(dc_link)) { + if (hpd_irq_data.bytes.device_service_irq.bits.UP_REQ_MSG_RDY || + hpd_irq_data.bytes.device_service_irq.bits.DOWN_REP_MSG_RDY) { + dm_handle_mst_sideband_msg(aconnector); goto out; } - } - /* - * TODO: We need the lock to avoid touching DC state while it's being - * modified during automated compliance testing, or when link loss - * happens. While this should be split into subhandlers and proper - * interfaces to avoid having to conditionally lock like this in the - * outer layer, we need this workaround temporarily to allow MST - * lightup in some scenarios to avoid timeout. - */ - if (!amdgpu_in_reset(adev) && - (hpd_rx_irq_check_link_loss_status(dc_link, &hpd_irq_data) || - hpd_irq_data.bytes.device_service_irq.bits.AUTOMATED_TEST)) { - mutex_lock(&adev->dm.dc_lock); - lock_flag = 1; - } + if (link_loss) { + bool skip = false; -#ifdef CONFIG_DRM_AMD_DC_HDCP - result = dc_link_handle_hpd_rx_irq(dc_link, &hpd_irq_data, NULL); -#else - result = dc_link_handle_hpd_rx_irq(dc_link, NULL, NULL); -#endif - if (!amdgpu_in_reset(adev) && lock_flag) - mutex_unlock(&adev->dm.dc_lock); + spin_lock(&offload_wq->offload_lock); + skip = offload_wq->is_handling_link_loss; + + if (!skip) + offload_wq->is_handling_link_loss = true; + + spin_unlock(&offload_wq->offload_lock); + + if (!skip) + schedule_hpd_rx_offload_work(offload_wq, hpd_irq_data); + + goto out; + } + } out: if (result && !is_mst_root_connector) { @@ -3095,6 +3222,10 @@ static void register_hpd_handlers(struct amdgpu_device *adev) amdgpu_dm_irq_register_interrupt(adev, &int_params, handle_hpd_rx_irq, (void *) aconnector); + + if (adev->dm.hpd_rx_offload_wq) + adev->dm.hpd_rx_offload_wq[connector->index].aconnector = + aconnector; } } } diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h index da87ca77023d65..cd059af033b4c0 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h @@ -171,6 +171,48 @@ struct dal_allocation { u64 gpu_addr; }; +/** + * struct hpd_rx_irq_offload_work_queue - Work queue to handle hpd_rx_irq + * offload work + */ +struct hpd_rx_irq_offload_work_queue { + /** + * @wq: workqueue structure to queue offload work. + */ + struct workqueue_struct *wq; + /** + * @offload_lock: To protect fields of offload work queue. + */ + spinlock_t offload_lock; + /** + * @is_handling_link_loss: Used to prevent inserting link loss event when + * we're handling link loss + */ + bool is_handling_link_loss; + /** + * @aconnector: The aconnector that this work queue is attached to + */ + struct amdgpu_dm_connector *aconnector; +}; + +/** + * struct hpd_rx_irq_offload_work - hpd_rx_irq offload work structure + */ +struct hpd_rx_irq_offload_work { + /** + * @work: offload work + */ + struct work_struct work; + /** + * @data: reference irq data which is used while handling offload work + */ + union hpd_irq_data data; + /** + * @offload_wq: offload work queue that this work is queued to + */ + struct hpd_rx_irq_offload_work_queue *offload_wq; +}; + /** * struct amdgpu_display_manager - Central amdgpu display manager device * @@ -461,7 +503,12 @@ struct amdgpu_display_manager { */ struct crc_rd_work *crc_rd_wrk; #endif - + /** + * @hpd_rx_offload_wq: + * + * Work queue to offload works of hpd_rx_irq + */ + struct hpd_rx_irq_offload_work_queue *hpd_rx_offload_wq; /** * @mst_encoders: * diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c index 9b6111eb9ca445..6d5dc5ab3d8c0d 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c @@ -2075,7 +2075,7 @@ static struct dc_link_settings get_max_link_cap(struct dc_link *link) return max_link_cap; } -enum dc_status read_hpd_rx_irq_data( +static enum dc_status read_hpd_rx_irq_data( struct dc_link *link, union hpd_irq_data *irq_data) { @@ -3257,7 +3257,7 @@ void dc_link_dp_handle_link_loss(struct dc_link *link) } } -static bool handle_hpd_rx_irq(struct dc_link *link, union hpd_irq_data *out_hpd_irq_dpcd_data, bool *out_link_loss, +bool dc_link_handle_hpd_rx_irq(struct dc_link *link, union hpd_irq_data *out_hpd_irq_dpcd_data, bool *out_link_loss, bool defer_handling, bool *has_left_work) { union hpd_irq_data hpd_irq_dpcd_data = { { { {0} } } }; @@ -3379,11 +3379,6 @@ static bool handle_hpd_rx_irq(struct dc_link *link, union hpd_irq_data *out_hpd_ return status; } -bool dc_link_handle_hpd_rx_irq(struct dc_link *link, union hpd_irq_data *out_hpd_irq_dpcd_data, bool *out_link_loss) -{ - return handle_hpd_rx_irq(link, out_hpd_irq_dpcd_data, out_link_loss, false, NULL); -} - /*query dpcd for version and mst cap addresses*/ bool is_mst_supported(struct dc_link *link) { diff --git a/drivers/gpu/drm/amd/display/dc/dc_link.h b/drivers/gpu/drm/amd/display/dc/dc_link.h index 0efa2bc8639baf..9b7c32f7fd86ff 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_link.h +++ b/drivers/gpu/drm/amd/display/dc/dc_link.h @@ -296,7 +296,8 @@ enum dc_status dc_link_allocate_mst_payload(struct pipe_ctx *pipe_ctx); * false - no change in Downstream port status. No further action required * from DM. */ bool dc_link_handle_hpd_rx_irq(struct dc_link *dc_link, - union hpd_irq_data *hpd_irq_dpcd_data, bool *out_link_loss); + union hpd_irq_data *hpd_irq_dpcd_data, bool *out_link_loss, + bool defer_handling, bool *has_left_work); /* * On eDP links this function call will stall until T12 has elapsed. @@ -305,9 +306,6 @@ bool dc_link_handle_hpd_rx_irq(struct dc_link *dc_link, */ bool dc_link_wait_for_t12(struct dc_link *link); -enum dc_status read_hpd_rx_irq_data( - struct dc_link *link, - union hpd_irq_data *irq_data); void dc_link_dp_handle_automated_test(struct dc_link *link); void dc_link_dp_handle_link_loss(struct dc_link *link); bool dc_link_dp_allow_hpd_rx_irq(const struct dc_link *link); From 8bae037b0fb429466fed1585440f337396d72efa Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 20 Oct 2021 16:45:00 -0400 Subject: [PATCH 0797/1056] drm/amdgpu/display: add quirk handling for stutter mode [ Upstream commit 3ce51649cdf23ab463494df2bd6d1e9529ebdc6a ] Stutter mode is a power saving feature on GPUs, however at least one early raven system exhibits stability issues with it. Add a quirk to disable it for that system. Bug: https://bugzilla.kernel.org/show_bug.cgi?id=214417 Fixes: 005440066f929b ("drm/amdgpu: enable gfxoff again on raven series (v2)") Reviewed-by: Harry Wentland Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 33 +++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index ce647302a1ec36..873cb00519520d 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -1313,6 +1313,37 @@ static struct hpd_rx_irq_offload_work_queue *hpd_rx_irq_create_workqueue(struct return hpd_rx_offload_wq; } +struct amdgpu_stutter_quirk { + u16 chip_vendor; + u16 chip_device; + u16 subsys_vendor; + u16 subsys_device; + u8 revision; +}; + +static const struct amdgpu_stutter_quirk amdgpu_stutter_quirk_list[] = { + /* https://bugzilla.kernel.org/show_bug.cgi?id=214417 */ + { 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc8 }, + { 0, 0, 0, 0, 0 }, +}; + +static bool dm_should_disable_stutter(struct pci_dev *pdev) +{ + const struct amdgpu_stutter_quirk *p = amdgpu_stutter_quirk_list; + + while (p && p->chip_device != 0) { + if (pdev->vendor == p->chip_vendor && + pdev->device == p->chip_device && + pdev->subsystem_vendor == p->subsys_vendor && + pdev->subsystem_device == p->subsys_device && + pdev->revision == p->revision) { + return true; + } + ++p; + } + return false; +} + static int amdgpu_dm_init(struct amdgpu_device *adev) { struct dc_init_data init_data; @@ -1421,6 +1452,8 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) if (adev->asic_type != CHIP_CARRIZO && adev->asic_type != CHIP_STONEY) adev->dm.dc->debug.disable_stutter = amdgpu_pp_feature_mask & PP_STUTTER_MODE ? false : true; + if (dm_should_disable_stutter(adev->pdev)) + adev->dm.dc->debug.disable_stutter = true; if (amdgpu_dc_debug_mask & DC_DISABLE_STUTTER) adev->dm.dc->debug.disable_stutter = true; From c7720f23bb93628f30f16d033eaefbe2540f1b5e Mon Sep 17 00:00:00 2001 From: Fangzhi Zuo Date: Wed, 6 Jul 2022 15:52:46 -0400 Subject: [PATCH 0798/1056] drm/amd/display: Ignore First MST Sideband Message Return Error [ Upstream commit acea108fa067d140bd155161a79b1fcd967f4137 ] [why] First MST sideband message returns AUX_RET_ERROR_HPD_DISCON on certain intel platform. Aux transaction considered failure if HPD unexpected pulled low. The actual aux transaction success in such case, hence do not return error. [how] Not returning error when AUX_RET_ERROR_HPD_DISCON detected on the first sideband message. v2: squash in additional DMI entries v3: squash in static fix Signed-off-by: Fangzhi Zuo Acked-by: Solomon Chiu Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Sasha Levin --- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 39 +++++++++++++++++++ .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h | 8 ++++ .../display/amdgpu_dm/amdgpu_dm_mst_types.c | 17 ++++++++ 3 files changed, 64 insertions(+) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 873cb00519520d..7150afacbc4f51 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -70,6 +70,7 @@ #include #include #include +#include #include #include @@ -1344,6 +1345,41 @@ static bool dm_should_disable_stutter(struct pci_dev *pdev) return false; } +static const struct dmi_system_id hpd_disconnect_quirk_table[] = { + { + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "Precision 3660"), + }, + }, + { + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "Precision 3260"), + }, + }, + { + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "Precision 3460"), + }, + }, + {} +}; + +static void retrieve_dmi_info(struct amdgpu_display_manager *dm) +{ + const struct dmi_system_id *dmi_id; + + dm->aux_hpd_discon_quirk = false; + + dmi_id = dmi_first_match(hpd_disconnect_quirk_table); + if (dmi_id) { + dm->aux_hpd_discon_quirk = true; + DRM_INFO("aux_hpd_discon_quirk attached\n"); + } +} + static int amdgpu_dm_init(struct amdgpu_device *adev) { struct dc_init_data init_data; @@ -1435,6 +1471,9 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) init_data.flags.power_down_display_on_boot = true; INIT_LIST_HEAD(&adev->dm.da_list); + + retrieve_dmi_info(&adev->dm); + /* Display Core create. */ adev->dm.dc = dc_create(&init_data); diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h index cd059af033b4c0..f9c3e5a417138f 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h @@ -539,6 +539,14 @@ struct amdgpu_display_manager { * last successfully applied backlight values. */ u32 actual_brightness[AMDGPU_DM_MAX_NUM_EDP]; + + /** + * @aux_hpd_discon_quirk: + * + * quirk for hpd discon while aux is on-going. + * occurred on certain intel platform + */ + bool aux_hpd_discon_quirk; }; enum dsc_clock_force_state { diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c index 74885ff77f96a5..652cf108b3c2a7 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c @@ -55,6 +55,8 @@ static ssize_t dm_dp_aux_transfer(struct drm_dp_aux *aux, ssize_t result = 0; struct aux_payload payload; enum aux_return_code_type operation_result; + struct amdgpu_device *adev; + struct ddc_service *ddc; if (WARN_ON(msg->size > 16)) return -E2BIG; @@ -71,6 +73,21 @@ static ssize_t dm_dp_aux_transfer(struct drm_dp_aux *aux, result = dc_link_aux_transfer_raw(TO_DM_AUX(aux)->ddc_service, &payload, &operation_result); + /* + * w/a on certain intel platform where hpd is unexpected to pull low during + * 1st sideband message transaction by return AUX_RET_ERROR_HPD_DISCON + * aux transaction is succuess in such case, therefore bypass the error + */ + ddc = TO_DM_AUX(aux)->ddc_service; + adev = ddc->ctx->driver_context; + if (adev->dm.aux_hpd_discon_quirk) { + if (msg->address == DP_SIDEBAND_MSG_DOWN_REQ_BASE && + operation_result == AUX_RET_ERROR_HPD_DISCON) { + result = 0; + operation_result = AUX_RET_SUCCESS; + } + } + if (payload.write && result >= 0) result = msg->size; From 04ae8518745ce4ebb0acbe1dbcbdc18917841048 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Wed, 6 Jul 2022 20:59:42 +0800 Subject: [PATCH 0799/1056] scsi: megaraid: Clear READ queue map's nr_queues [ Upstream commit 8312cd3a7b835ae3033a679e5f0014a40e7891c5 ] The megaraid SCSI driver sets set->nr_maps as 3 if poll_queues is > 0, and blk-mq actually initializes each map's nr_queues as nr_hw_queues. Consequently the driver has to clear READ queue map's nr_queues, otherwise the queue map becomes broken if poll_queues is set as non-zero. Link: https://lore.kernel.org/r/20220706125942.528533-1-ming.lei@redhat.com Fixes: 9e4bec5b2a23 ("scsi: megaraid_sas: mq_poll support") Cc: Kashyap Desai Cc: sumit.saxena@broadcom.com Cc: chandrakanth.patil@broadcom.com Cc: linux-block@vger.kernel.org Cc: Hannes Reinecke Reported-by: Guangwu Zhang Tested-by: Guangwu Zhang Reviewed-by: Bart Van Assche Signed-off-by: Ming Lei Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/megaraid/megaraid_sas_base.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c index bb3f78013a13bf..88e164e3d2eaca 100644 --- a/drivers/scsi/megaraid/megaraid_sas_base.c +++ b/drivers/scsi/megaraid/megaraid_sas_base.c @@ -3196,6 +3196,9 @@ static int megasas_map_queues(struct Scsi_Host *shost) qoff += map->nr_queues; offset += map->nr_queues; + /* we never use READ queue, so can't cheat blk-mq */ + shost->tag_set.map[HCTX_TYPE_READ].nr_queues = 0; + /* Setup Poll hctx */ map = &shost->tag_set.map[HCTX_TYPE_POLL]; map->nr_queues = instance->iopoll_q_count; From dbedad96f416495b446dbb4348856a4a6fa52b85 Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Fri, 8 Jul 2022 17:00:27 -0700 Subject: [PATCH 0800/1056] scsi: ufs: core: Drop loglevel of WriteBoost message [ Upstream commit 2ae57c995003a7840cb6b5ec5f0c06193695321b ] Commit '3b5f3c0d0548 ("scsi: ufs: core: Tidy up WB configuration code")' changed the log level of the write boost enable/disable notification from debug to info. This results in a lot of noise in the kernel log during normal operation. Drop it back to debug level to avoid this. Link: https://lore.kernel.org/r/20220709000027.3929970-1-bjorn.andersson@linaro.org Fixes: 3b5f3c0d0548 ("scsi: ufs: core: Tidy up WB configuration code") Reviewed-by: Alim Akhtar Acked-by: Bean Huo Signed-off-by: Bjorn Andersson Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/ufs/ufshcd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index 5c9a31f18b7f08..4a7248421bcd0d 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -5638,7 +5638,7 @@ int ufshcd_wb_toggle(struct ufs_hba *hba, bool enable) } hba->dev_info.wb_enabled = enable; - dev_info(hba->dev, "%s Write Booster %s\n", + dev_dbg(hba->dev, "%s Write Booster %s\n", __func__, enable ? "enabled" : "disabled"); return ret; From 321abf90c5768093f275e93c09b75c2f670f64f4 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 24 Feb 2022 17:46:50 +0100 Subject: [PATCH 0801/1056] nvme: check for duplicate identifiers earlier [ Upstream commit e2d77d2e11c4f1e70a1a24cc8fe63ff3dc9b53ef ] Lift the check for duplicate identifiers into nvme_init_ns_head, which avoids pointless error unwinding in case they don't match, and also matches where we check identifier validity for the multipath case. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Chaitanya Kulkarni Signed-off-by: Sasha Levin --- drivers/nvme/host/core.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 29b56ea01132f4..8fca84d4444677 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -3686,13 +3686,6 @@ static struct nvme_ns_head *nvme_alloc_ns_head(struct nvme_ctrl *ctrl, head->ids = *ids; kref_init(&head->ref); - ret = nvme_subsys_check_duplicate_ids(ctrl->subsys, &head->ids); - if (ret) { - dev_err(ctrl->device, - "duplicate IDs for nsid %d\n", nsid); - goto out_cleanup_srcu; - } - if (head->ids.csi) { ret = nvme_get_effects_log(ctrl, head->ids.csi, &head->effects); if (ret) @@ -3731,6 +3724,12 @@ static int nvme_init_ns_head(struct nvme_ns *ns, unsigned nsid, mutex_lock(&ctrl->subsys->lock); head = nvme_find_ns_head(ctrl, nsid); if (!head) { + ret = nvme_subsys_check_duplicate_ids(ctrl->subsys, ids); + if (ret) { + dev_err(ctrl->device, + "duplicate IDs for nsid %d\n", nsid); + goto out_unlock; + } head = nvme_alloc_ns_head(ctrl, nsid, ids); if (IS_ERR(head)) { ret = PTR_ERR(head); From 98d81b2b154d1aea706057939fe6c1b6b57c3b55 Mon Sep 17 00:00:00 2001 From: Israel Rukshin Date: Thu, 14 Jul 2022 12:42:10 +0000 Subject: [PATCH 0802/1056] nvme: fix block device naming collision [ Upstream commit 6961b5e02876b3b47f030a1f1ee8fd3e631ac270 ] The issue exists when multipath is enabled and the namespace is shared, but all the other controller checks at nvme_is_unique_nsid() are false. The reason for this issue is that nvme_is_unique_nsid() returns false when is called from nvme_mpath_alloc_disk() due to an uninitialized value of head->shared. The patch fixes it by setting head->shared before nvme_mpath_alloc_disk() is called. Fixes: 5974ea7ce0f9 ("nvme: allow duplicate NSIDs for private namespaces") Signed-off-by: Israel Rukshin Reviewed-by: Keith Busch Reviewed-by: Max Gurtovoy Signed-off-by: Christoph Hellwig Signed-off-by: Sasha Levin --- drivers/nvme/host/core.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 8fca84d4444677..0c9cdbaf5cd635 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -3660,7 +3660,7 @@ static int nvme_add_ns_cdev(struct nvme_ns *ns) } static struct nvme_ns_head *nvme_alloc_ns_head(struct nvme_ctrl *ctrl, - unsigned nsid, struct nvme_ns_ids *ids) + unsigned nsid, struct nvme_ns_ids *ids, bool is_shared) { struct nvme_ns_head *head; size_t size = sizeof(*head); @@ -3684,6 +3684,7 @@ static struct nvme_ns_head *nvme_alloc_ns_head(struct nvme_ctrl *ctrl, head->subsys = ctrl->subsys; head->ns_id = nsid; head->ids = *ids; + head->shared = is_shared; kref_init(&head->ref); if (head->ids.csi) { @@ -3730,12 +3731,11 @@ static int nvme_init_ns_head(struct nvme_ns *ns, unsigned nsid, "duplicate IDs for nsid %d\n", nsid); goto out_unlock; } - head = nvme_alloc_ns_head(ctrl, nsid, ids); + head = nvme_alloc_ns_head(ctrl, nsid, ids, is_shared); if (IS_ERR(head)) { ret = PTR_ERR(head); goto out_unlock; } - head->shared = is_shared; } else { ret = -EINVAL; if (!is_shared || !head->shared) { From 63aa107bce83eb1265336a80309dcbe3a128a924 Mon Sep 17 00:00:00 2001 From: Sasha Neftin Date: Sun, 8 May 2022 10:09:05 +0300 Subject: [PATCH 0803/1056] e1000e: Enable GPT clock before sending message to CSME [ Upstream commit b49feacbeffc7635cc6692cbcc6a1eae2c17da6f ] On corporate (CSME) ADL systems, the Ethernet Controller may stop working ("HW unit hang") after exiting from the s0ix state. The reason is that CSME misses the message sent by the host. Enabling the dynamic GPT clock solves this problem. This clock is cleared upon HW initialization. Fixes: 3e55d231716e ("e1000e: Add handshake with the CSME to support S0ix") Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=214821 Reviewed-by: Dima Ruinskiy Signed-off-by: Sasha Neftin Tested-by: Chia-Lin Kao (AceLan) Tested-by: Naama Meir Signed-off-by: Tony Nguyen Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/e1000e/netdev.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index ce48e630fe5502..0fba6ccecf127f 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -6499,6 +6499,10 @@ static void e1000e_s0ix_exit_flow(struct e1000_adapter *adapter) if (er32(FWSM) & E1000_ICH_FWSM_FW_VALID && hw->mac.type >= e1000_pch_adp) { + /* Keep the GPT clock enabled for CSME */ + mac_data = er32(FEXTNVM); + mac_data |= BIT(3); + ew32(FEXTNVM, mac_data); /* Request ME unconfigure the device from S0ix */ mac_data = er32(H2ME); mac_data &= ~E1000_H2ME_START_DPG; From a4f7a9fbe24b3e3f7b78c9c9b4cc9458ea172745 Mon Sep 17 00:00:00 2001 From: Sasha Neftin Date: Mon, 9 May 2022 11:52:54 +0300 Subject: [PATCH 0804/1056] Revert "e1000e: Fix possible HW unit hang after an s0ix exit" [ Upstream commit 6cfa45361d3eac31ba67d7d0bbef547151450106 ] This reverts commit 1866aa0d0d6492bc2f8d22d0df49abaccf50cddd. Commit 1866aa0d0d64 ("e1000e: Fix possible HW unit hang after an s0ix exit") was a workaround for CSME problem to handle messages comes via H2ME mailbox. This problem has been fixed by patch "e1000e: Enable the GPT clock before sending message to the CSME". Fixes: 3e55d231716e ("e1000e: Add handshake with the CSME to support S0ix") Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=214821 Signed-off-by: Sasha Neftin Tested-by: Naama Meir Signed-off-by: Tony Nguyen Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/e1000e/hw.h | 1 - drivers/net/ethernet/intel/e1000e/ich8lan.c | 4 ---- drivers/net/ethernet/intel/e1000e/ich8lan.h | 1 - drivers/net/ethernet/intel/e1000e/netdev.c | 26 --------------------- 4 files changed, 32 deletions(-) diff --git a/drivers/net/ethernet/intel/e1000e/hw.h b/drivers/net/ethernet/intel/e1000e/hw.h index 13382df2f2eff4..bcf680e838113d 100644 --- a/drivers/net/ethernet/intel/e1000e/hw.h +++ b/drivers/net/ethernet/intel/e1000e/hw.h @@ -630,7 +630,6 @@ struct e1000_phy_info { bool disable_polarity_correction; bool is_mdix; bool polarity_correction; - bool reset_disable; bool speed_downgraded; bool autoneg_wait_to_complete; }; diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c b/drivers/net/ethernet/intel/e1000e/ich8lan.c index e6c8e6d5234f81..9466f65a6da774 100644 --- a/drivers/net/ethernet/intel/e1000e/ich8lan.c +++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c @@ -2050,10 +2050,6 @@ static s32 e1000_check_reset_block_ich8lan(struct e1000_hw *hw) bool blocked = false; int i = 0; - /* Check the PHY (LCD) reset flag */ - if (hw->phy.reset_disable) - return true; - while ((blocked = !(er32(FWSM) & E1000_ICH_FWSM_RSPCIPHY)) && (i++ < 30)) usleep_range(10000, 11000); diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.h b/drivers/net/ethernet/intel/e1000e/ich8lan.h index 638a3ddd7ada8b..2504b11c3169fa 100644 --- a/drivers/net/ethernet/intel/e1000e/ich8lan.h +++ b/drivers/net/ethernet/intel/e1000e/ich8lan.h @@ -271,7 +271,6 @@ #define I217_CGFREG_ENABLE_MTA_RESET 0x0002 #define I217_MEMPWR PHY_REG(772, 26) #define I217_MEMPWR_DISABLE_SMB_RELEASE 0x0010 -#define I217_MEMPWR_MOEM 0x1000 /* Receive Address Initial CRC Calculation */ #define E1000_PCH_RAICC(_n) (0x05F50 + ((_n) * 4)) diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index 0fba6ccecf127f..407bbb4cc236f3 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -6996,21 +6996,8 @@ static __maybe_unused int e1000e_pm_suspend(struct device *dev) struct net_device *netdev = pci_get_drvdata(to_pci_dev(dev)); struct e1000_adapter *adapter = netdev_priv(netdev); struct pci_dev *pdev = to_pci_dev(dev); - struct e1000_hw *hw = &adapter->hw; - u16 phy_data; int rc; - if (er32(FWSM) & E1000_ICH_FWSM_FW_VALID && - hw->mac.type >= e1000_pch_adp) { - /* Mask OEM Bits / Gig Disable / Restart AN (772_26[12] = 1) */ - e1e_rphy(hw, I217_MEMPWR, &phy_data); - phy_data |= I217_MEMPWR_MOEM; - e1e_wphy(hw, I217_MEMPWR, phy_data); - - /* Disable LCD reset */ - hw->phy.reset_disable = true; - } - e1000e_flush_lpic(pdev); e1000e_pm_freeze(dev); @@ -7032,8 +7019,6 @@ static __maybe_unused int e1000e_pm_resume(struct device *dev) struct net_device *netdev = pci_get_drvdata(to_pci_dev(dev)); struct e1000_adapter *adapter = netdev_priv(netdev); struct pci_dev *pdev = to_pci_dev(dev); - struct e1000_hw *hw = &adapter->hw; - u16 phy_data; int rc; /* Introduce S0ix implementation */ @@ -7044,17 +7029,6 @@ static __maybe_unused int e1000e_pm_resume(struct device *dev) if (rc) return rc; - if (er32(FWSM) & E1000_ICH_FWSM_FW_VALID && - hw->mac.type >= e1000_pch_adp) { - /* Unmask OEM Bits / Gig Disable / Restart AN 772_26[12] = 0 */ - e1e_rphy(hw, I217_MEMPWR, &phy_data); - phy_data &= ~I217_MEMPWR_MOEM; - e1e_wphy(hw, I217_MEMPWR, phy_data); - - /* Enable LCD reset */ - hw->phy.reset_disable = false; - } - return e1000e_pm_thaw(dev); } From e75b73081f1ec169518773626c2ff3950476660b Mon Sep 17 00:00:00 2001 From: Lennert Buytenhek Date: Thu, 2 Jun 2022 18:58:11 +0300 Subject: [PATCH 0805/1056] igc: Reinstate IGC_REMOVED logic and implement it properly [ Upstream commit 7c1ddcee5311f3315096217881d2dbe47cc683f9 ] The initially merged version of the igc driver code (via commit 146740f9abc4, "igc: Add support for PF") contained the following IGC_REMOVED checks in the igc_rd32/wr32() MMIO accessors: u32 igc_rd32(struct igc_hw *hw, u32 reg) { u8 __iomem *hw_addr = READ_ONCE(hw->hw_addr); u32 value = 0; if (IGC_REMOVED(hw_addr)) return ~value; value = readl(&hw_addr[reg]); /* reads should not return all F's */ if (!(~value) && (!reg || !(~readl(hw_addr)))) hw->hw_addr = NULL; return value; } And: #define wr32(reg, val) \ do { \ u8 __iomem *hw_addr = READ_ONCE((hw)->hw_addr); \ if (!IGC_REMOVED(hw_addr)) \ writel((val), &hw_addr[(reg)]); \ } while (0) E.g. igb has similar checks in its MMIO accessors, and has a similar macro E1000_REMOVED, which is implemented as follows: #define E1000_REMOVED(h) unlikely(!(h)) These checks serve to detect and take note of an 0xffffffff MMIO read return from the device, which can be caused by a PCIe link flap or some other kind of PCI bus error, and to avoid performing MMIO reads and writes from that point onwards. However, the IGC_REMOVED macro was not originally implemented: #ifndef IGC_REMOVED #define IGC_REMOVED(a) (0) #endif /* IGC_REMOVED */ This led to the IGC_REMOVED logic to be removed entirely in a subsequent commit (commit 3c215fb18e70, "igc: remove IGC_REMOVED function"), with the rationale that such checks matter only for virtualization and that igc does not support virtualization -- but a PCIe device can become detached even without virtualization being in use, and without proper checks, a PCIe bus error affecting an igc adapter will lead to various NULL pointer dereferences, as the first access after the error will set hw->hw_addr to NULL, and subsequent accesses will blindly dereference this now-NULL pointer. This patch reinstates the IGC_REMOVED checks in igc_rd32/wr32(), and implements IGC_REMOVED the way it is done for igb, by checking for the unlikely() case of hw_addr being NULL. This change prevents the oopses seen when a PCIe link flap occurs on an igc adapter. Fixes: 146740f9abc4 ("igc: Add support for PF") Signed-off-by: Lennert Buytenhek Tested-by: Naama Meir Acked-by: Sasha Neftin Signed-off-by: Tony Nguyen Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/igc/igc_main.c | 3 +++ drivers/net/ethernet/intel/igc/igc_regs.h | 5 ++++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index f99819fc559d9b..2a84f57ea68b48 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -6159,6 +6159,9 @@ u32 igc_rd32(struct igc_hw *hw, u32 reg) u8 __iomem *hw_addr = READ_ONCE(hw->hw_addr); u32 value = 0; + if (IGC_REMOVED(hw_addr)) + return ~value; + value = readl(&hw_addr[reg]); /* reads should not return all F's */ diff --git a/drivers/net/ethernet/intel/igc/igc_regs.h b/drivers/net/ethernet/intel/igc/igc_regs.h index e197a33d93a030..026c3b65fc37aa 100644 --- a/drivers/net/ethernet/intel/igc/igc_regs.h +++ b/drivers/net/ethernet/intel/igc/igc_regs.h @@ -306,7 +306,8 @@ u32 igc_rd32(struct igc_hw *hw, u32 reg); #define wr32(reg, val) \ do { \ u8 __iomem *hw_addr = READ_ONCE((hw)->hw_addr); \ - writel((val), &hw_addr[(reg)]); \ + if (!IGC_REMOVED(hw_addr)) \ + writel((val), &hw_addr[(reg)]); \ } while (0) #define rd32(reg) (igc_rd32(hw, reg)) @@ -318,4 +319,6 @@ do { \ #define array_rd32(reg, offset) (igc_rd32(hw, (reg) + ((offset) << 2))) +#define IGC_REMOVED(h) unlikely(!(h)) + #endif From 01083e3f9868a96e678496136cef195a89e292f9 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 13 Jul 2022 13:51:52 -0700 Subject: [PATCH 0806/1056] ip: Fix data-races around sysctl_ip_no_pmtu_disc. [ Upstream commit 0968d2a441bf6afb551fd99e60fa65ed67068963 ] While reading sysctl_ip_no_pmtu_disc, it can be changed concurrently. Thus, we need to add READ_ONCE() to its readers. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/ipv4/af_inet.c | 2 +- net/ipv4/icmp.c | 2 +- net/ipv6/af_inet6.c | 2 +- net/xfrm/xfrm_state.c | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 44f21278003d17..781c595f6880d5 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -338,7 +338,7 @@ static int inet_create(struct net *net, struct socket *sock, int protocol, inet->hdrincl = 1; } - if (net->ipv4.sysctl_ip_no_pmtu_disc) + if (READ_ONCE(net->ipv4.sysctl_ip_no_pmtu_disc)) inet->pmtudisc = IP_PMTUDISC_DONT; else inet->pmtudisc = IP_PMTUDISC_WANT; diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index a5cc89506c1e49..609c4ff7edc694 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -887,7 +887,7 @@ static bool icmp_unreach(struct sk_buff *skb) * values please see * Documentation/networking/ip-sysctl.rst */ - switch (net->ipv4.sysctl_ip_no_pmtu_disc) { + switch (READ_ONCE(net->ipv4.sysctl_ip_no_pmtu_disc)) { default: net_dbg_ratelimited("%pI4: fragmentation needed and DF set\n", &iph->daddr); diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index dab4a047590b73..3a91d0d40aecc1 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -226,7 +226,7 @@ static int inet6_create(struct net *net, struct socket *sock, int protocol, RCU_INIT_POINTER(inet->mc_list, NULL); inet->rcv_tos = 0; - if (net->ipv4.sysctl_ip_no_pmtu_disc) + if (READ_ONCE(net->ipv4.sysctl_ip_no_pmtu_disc)) inet->pmtudisc = IP_PMTUDISC_DONT; else inet->pmtudisc = IP_PMTUDISC_WANT; diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index f7bfa19169688d..b1a04a22166f76 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -2619,7 +2619,7 @@ int __xfrm_init_state(struct xfrm_state *x, bool init_replay, bool offload) int err; if (family == AF_INET && - xs_net(x)->ipv4.sysctl_ip_no_pmtu_disc) + READ_ONCE(xs_net(x)->ipv4.sysctl_ip_no_pmtu_disc)) x->props.flags |= XFRM_STATE_NOPMTUDISC; err = -EPROTONOSUPPORT; From 93fbc06da1d819f3981a7bd7928c3641ea67b364 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 13 Jul 2022 13:51:53 -0700 Subject: [PATCH 0807/1056] ip: Fix data-races around sysctl_ip_fwd_use_pmtu. [ Upstream commit 60c158dc7b1f0558f6cadd5b50d0386da0000d50 ] While reading sysctl_ip_fwd_use_pmtu, it can be changed concurrently. Thus, we need to add READ_ONCE() to its readers. Fixes: f87c10a8aa1e ("ipv4: introduce ip_dst_mtu_maybe_forward and protect forwarding path against pmtu spoofing") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- include/net/ip.h | 2 +- net/ipv4/route.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/include/net/ip.h b/include/net/ip.h index a77a9e1c6c047f..c69dd114f367ba 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -441,7 +441,7 @@ static inline unsigned int ip_dst_mtu_maybe_forward(const struct dst_entry *dst, struct net *net = dev_net(dst->dev); unsigned int mtu; - if (net->ipv4.sysctl_ip_fwd_use_pmtu || + if (READ_ONCE(net->ipv4.sysctl_ip_fwd_use_pmtu) || ip_mtu_locked(dst) || !forwarding) { mtu = rt->rt_pmtu; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 1db2fda2283066..7f08a30256c52b 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1404,7 +1404,7 @@ u32 ip_mtu_from_fib_result(struct fib_result *res, __be32 daddr) struct fib_info *fi = res->fi; u32 mtu = 0; - if (dev_net(dev)->ipv4.sysctl_ip_fwd_use_pmtu || + if (READ_ONCE(dev_net(dev)->ipv4.sysctl_ip_fwd_use_pmtu) || fi->fib_metrics->metrics[RTAX_LOCK - 1] & (1 << RTAX_MTU)) mtu = fi->fib_mtu; From 351f81f7d7185d18a9ff76f8f8c2fa8c4eea563b Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 13 Jul 2022 13:51:54 -0700 Subject: [PATCH 0808/1056] ip: Fix data-races around sysctl_ip_fwd_update_priority. [ Upstream commit 7bf9e18d9a5e99e3c83482973557e9f047b051e7 ] While reading sysctl_ip_fwd_update_priority, it can be changed concurrently. Thus, we need to add READ_ONCE() to its readers. Fixes: 432e05d32892 ("net: ipv4: Control SKB reprioritization after forwarding") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 3 ++- net/ipv4/ip_forward.c | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 6ef4ca8599acd6..d17156c11ef804 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -9787,13 +9787,14 @@ static int mlxsw_sp_dscp_init(struct mlxsw_sp *mlxsw_sp) static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp) { struct net *net = mlxsw_sp_net(mlxsw_sp); - bool usp = net->ipv4.sysctl_ip_fwd_update_priority; char rgcr_pl[MLXSW_REG_RGCR_LEN]; u64 max_rifs; + bool usp; if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_RIFS)) return -EIO; max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); + usp = READ_ONCE(net->ipv4.sysctl_ip_fwd_update_priority); mlxsw_reg_rgcr_pack(rgcr_pl, true, true); mlxsw_reg_rgcr_max_router_interfaces_set(rgcr_pl, max_rifs); diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c index 00ec819f949b5e..29730edda220af 100644 --- a/net/ipv4/ip_forward.c +++ b/net/ipv4/ip_forward.c @@ -151,7 +151,7 @@ int ip_forward(struct sk_buff *skb) !skb_sec_path(skb)) ip_rt_send_redirect(skb); - if (net->ipv4.sysctl_ip_fwd_update_priority) + if (READ_ONCE(net->ipv4.sysctl_ip_fwd_update_priority)) skb->priority = rt_tos2priority(iph->tos); return NF_HOOK(NFPROTO_IPV4, NF_INET_FORWARD, From 4fdf9bdb28835566d5648cd4c0200d37e4efa791 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 13 Jul 2022 13:51:55 -0700 Subject: [PATCH 0809/1056] ip: Fix data-races around sysctl_ip_nonlocal_bind. [ Upstream commit 289d3b21fb0bfc94c4e98f10635bba1824e5f83c ] While reading sysctl_ip_nonlocal_bind, it can be changed concurrently. Thus, we need to add READ_ONCE() to its readers. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- include/net/inet_sock.h | 2 +- net/sctp/protocol.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index d81b7f85819ed1..defd77baf74a64 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -373,7 +373,7 @@ static inline bool inet_get_convert_csum(struct sock *sk) static inline bool inet_can_nonlocal_bind(struct net *net, struct inet_sock *inet) { - return net->ipv4.sysctl_ip_nonlocal_bind || + return READ_ONCE(net->ipv4.sysctl_ip_nonlocal_bind) || inet->freebind || inet->transparent; } diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index ec0f52567c16ff..9987decdead219 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -359,7 +359,7 @@ static int sctp_v4_available(union sctp_addr *addr, struct sctp_sock *sp) if (addr->v4.sin_addr.s_addr != htonl(INADDR_ANY) && ret != RTN_LOCAL && !sp->inet.freebind && - !net->ipv4.sysctl_ip_nonlocal_bind) + !READ_ONCE(net->ipv4.sysctl_ip_nonlocal_bind)) return 0; if (ipv6_only_sock(sctp_opt2sk(sp))) From 87ceaa199a72c5856d49a030941fabcd5c3928d4 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 13 Jul 2022 13:51:56 -0700 Subject: [PATCH 0810/1056] ip: Fix a data-race around sysctl_ip_autobind_reuse. [ Upstream commit 0db232765887d9807df8bcb7b6f29b2871539eab ] While reading sysctl_ip_autobind_reuse, it can be changed concurrently. Thus, we need to add READ_ONCE() to its reader. Fixes: 4b01a9674231 ("tcp: bind(0) remove the SO_REUSEADDR restriction when ephemeral ports are exhausted.") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/ipv4/inet_connection_sock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 62a67fdc344cd2..d3bbb344bbe12b 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -259,7 +259,7 @@ inet_csk_find_open_port(struct sock *sk, struct inet_bind_bucket **tb_ret, int * goto other_half_scan; } - if (net->ipv4.sysctl_ip_autobind_reuse && !relax) { + if (READ_ONCE(net->ipv4.sysctl_ip_autobind_reuse) && !relax) { /* We still have a chance to connect to different destinations */ relax = true; goto ports_exhausted; From dccf8a67f30e18980d13f07006e5a536bbd1e136 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 13 Jul 2022 13:51:57 -0700 Subject: [PATCH 0811/1056] ip: Fix a data-race around sysctl_fwmark_reflect. [ Upstream commit 85d0b4dbd74b95cc492b1f4e34497d3f894f5d9a ] While reading sysctl_fwmark_reflect, it can be changed concurrently. Thus, we need to add READ_ONCE() to its reader. Fixes: e110861f8609 ("net: add a sysctl to reflect the fwmark on replies") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- include/net/ip.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/net/ip.h b/include/net/ip.h index c69dd114f367ba..a0ac57af82dcfd 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -379,7 +379,7 @@ void ipfrag_init(void); void ip_static_sysctl_init(void); #define IP4_REPLY_MARK(net, mark) \ - ((net)->ipv4.sysctl_fwmark_reflect ? (mark) : 0) + (READ_ONCE((net)->ipv4.sysctl_fwmark_reflect) ? (mark) : 0) static inline bool ip_is_fragment(const struct iphdr *iph) { From bf3134feffe61b7a0e21f60a04743f8da0958b53 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 13 Jul 2022 13:51:58 -0700 Subject: [PATCH 0812/1056] tcp/dccp: Fix a data-race around sysctl_tcp_fwmark_accept. [ Upstream commit 1a0008f9df59451d0a17806c1ee1a19857032fa8 ] While reading sysctl_tcp_fwmark_accept, it can be changed concurrently. Thus, we need to add READ_ONCE() to its reader. Fixes: 84f39b08d786 ("net: support marking accepting TCP sockets") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- include/net/inet_sock.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index defd77baf74a64..33344d54ad656c 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -107,7 +107,8 @@ static inline struct inet_request_sock *inet_rsk(const struct request_sock *sk) static inline u32 inet_request_mark(const struct sock *sk, struct sk_buff *skb) { - if (!sk->sk_mark && sock_net(sk)->ipv4.sysctl_tcp_fwmark_accept) + if (!sk->sk_mark && + READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fwmark_accept)) return skb->mark; return sk->sk_mark; From b8e29f64458318efbbdf3fb9820a2722cbd509be Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 13 May 2022 11:55:43 -0700 Subject: [PATCH 0813/1056] tcp: sk->sk_bound_dev_if once in inet_request_bound_dev_if() [ Upstream commit fdb5fd7f736ec7ae9fb36d2842ea6d9ebc4e7269 ] inet_request_bound_dev_if() reads sk->sk_bound_dev_if twice while listener socket is not locked. Another cpu could change this field under us. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- include/net/inet_sock.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index 33344d54ad656c..e71827aa2dfb16 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -117,14 +117,15 @@ static inline u32 inet_request_mark(const struct sock *sk, struct sk_buff *skb) static inline int inet_request_bound_dev_if(const struct sock *sk, struct sk_buff *skb) { + int bound_dev_if = READ_ONCE(sk->sk_bound_dev_if); #ifdef CONFIG_NET_L3_MASTER_DEV struct net *net = sock_net(sk); - if (!sk->sk_bound_dev_if && net->ipv4.sysctl_tcp_l3mdev_accept) + if (!bound_dev_if && net->ipv4.sysctl_tcp_l3mdev_accept) return l3mdev_master_ifindex_by_index(net, skb->skb_iif); #endif - return sk->sk_bound_dev_if; + return bound_dev_if; } static inline int inet_sk_bound_l3mdev(const struct sock *sk) From 9ba9cd43b5776c27d25e5a32dde9e80bdeb1c6a1 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 13 Jul 2022 13:51:59 -0700 Subject: [PATCH 0814/1056] tcp: Fix data-races around sysctl_tcp_l3mdev_accept. [ Upstream commit 08a75f10679470552a3a443f9aefd1399604d31d ] While reading sysctl_tcp_l3mdev_accept, it can be changed concurrently. Thus, we need to add READ_ONCE() to its readers. Fixes: 6dd9a14e92e5 ("net: Allow accepted sockets to be bound to l3mdev domain") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- include/net/inet_hashtables.h | 2 +- include/net/inet_sock.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index 98e1ec1a14f038..749bb1e460871a 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -207,7 +207,7 @@ static inline bool inet_sk_bound_dev_eq(struct net *net, int bound_dev_if, int dif, int sdif) { #if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV) - return inet_bound_dev_eq(!!net->ipv4.sysctl_tcp_l3mdev_accept, + return inet_bound_dev_eq(!!READ_ONCE(net->ipv4.sysctl_tcp_l3mdev_accept), bound_dev_if, dif, sdif); #else return inet_bound_dev_eq(true, bound_dev_if, dif, sdif); diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index e71827aa2dfb16..c307a547d2cb41 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -121,7 +121,7 @@ static inline int inet_request_bound_dev_if(const struct sock *sk, #ifdef CONFIG_NET_L3_MASTER_DEV struct net *net = sock_net(sk); - if (!bound_dev_if && net->ipv4.sysctl_tcp_l3mdev_accept) + if (!bound_dev_if && READ_ONCE(net->ipv4.sysctl_tcp_l3mdev_accept)) return l3mdev_master_ifindex_by_index(net, skb->skb_iif); #endif @@ -133,7 +133,7 @@ static inline int inet_sk_bound_l3mdev(const struct sock *sk) #ifdef CONFIG_NET_L3_MASTER_DEV struct net *net = sock_net(sk); - if (!net->ipv4.sysctl_tcp_l3mdev_accept) + if (!READ_ONCE(net->ipv4.sysctl_tcp_l3mdev_accept)) return l3mdev_master_ifindex_by_index(net, sk->sk_bound_dev_if); #endif From aabe9438fdfe004e021d5a206227ec105dbe2416 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 13 Jul 2022 13:52:00 -0700 Subject: [PATCH 0815/1056] tcp: Fix data-races around sysctl_tcp_mtu_probing. [ Upstream commit f47d00e077e7d61baf69e46dde3210c886360207 ] While reading sysctl_tcp_mtu_probing, it can be changed concurrently. Thus, we need to add READ_ONCE() to its readers. Fixes: 5d424d5a674f ("[TCP]: MTU probing") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/ipv4/tcp_output.c | 2 +- net/ipv4/tcp_timer.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 0bd5c334ccceec..1acfd4298a015e 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1765,7 +1765,7 @@ void tcp_mtup_init(struct sock *sk) struct inet_connection_sock *icsk = inet_csk(sk); struct net *net = sock_net(sk); - icsk->icsk_mtup.enabled = net->ipv4.sysctl_tcp_mtu_probing > 1; + icsk->icsk_mtup.enabled = READ_ONCE(net->ipv4.sysctl_tcp_mtu_probing) > 1; icsk->icsk_mtup.search_high = tp->rx_opt.mss_clamp + sizeof(struct tcphdr) + icsk->icsk_af_ops->net_header_len; icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, net->ipv4.sysctl_tcp_base_mss); diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 20cf4a98c69d85..98bb00e29e1e9b 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -163,7 +163,7 @@ static void tcp_mtu_probing(struct inet_connection_sock *icsk, struct sock *sk) int mss; /* Black hole detection */ - if (!net->ipv4.sysctl_tcp_mtu_probing) + if (!READ_ONCE(net->ipv4.sysctl_tcp_mtu_probing)) return; if (!icsk->icsk_mtup.enabled) { From 4d7dea651b7fe0322be95054f64e3711afccc543 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 13 Jul 2022 13:52:01 -0700 Subject: [PATCH 0816/1056] tcp: Fix data-races around sysctl_tcp_base_mss. [ Upstream commit 88d78bc097cd8ebc6541e93316c9d9bf651b13e8 ] While reading sysctl_tcp_base_mss, it can be changed concurrently. Thus, we need to add READ_ONCE() to its readers. Fixes: 5d424d5a674f ("[TCP]: MTU probing") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/ipv4/tcp_output.c | 2 +- net/ipv4/tcp_timer.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 1acfd4298a015e..53277a9a23001d 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1768,7 +1768,7 @@ void tcp_mtup_init(struct sock *sk) icsk->icsk_mtup.enabled = READ_ONCE(net->ipv4.sysctl_tcp_mtu_probing) > 1; icsk->icsk_mtup.search_high = tp->rx_opt.mss_clamp + sizeof(struct tcphdr) + icsk->icsk_af_ops->net_header_len; - icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, net->ipv4.sysctl_tcp_base_mss); + icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, READ_ONCE(net->ipv4.sysctl_tcp_base_mss)); icsk->icsk_mtup.probe_size = 0; if (icsk->icsk_mtup.enabled) icsk->icsk_mtup.probe_timestamp = tcp_jiffies32; diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 98bb00e29e1e9b..04063c7e33ba3a 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -171,7 +171,7 @@ static void tcp_mtu_probing(struct inet_connection_sock *icsk, struct sock *sk) icsk->icsk_mtup.probe_timestamp = tcp_jiffies32; } else { mss = tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_low) >> 1; - mss = min(net->ipv4.sysctl_tcp_base_mss, mss); + mss = min(READ_ONCE(net->ipv4.sysctl_tcp_base_mss), mss); mss = max(mss, net->ipv4.sysctl_tcp_mtu_probe_floor); mss = max(mss, net->ipv4.sysctl_tcp_min_snd_mss); icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, mss); From 0fc9357282df055e30990b29f4b7afa53ab42cdb Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 13 Jul 2022 13:52:02 -0700 Subject: [PATCH 0817/1056] tcp: Fix data-races around sysctl_tcp_min_snd_mss. [ Upstream commit 78eb166cdefcc3221c8c7c1e2d514e91a2eb5014 ] While reading sysctl_tcp_min_snd_mss, it can be changed concurrently. Thus, we need to add READ_ONCE() to its readers. Fixes: 5f3e2bf008c2 ("tcp: add tcp_min_snd_mss sysctl") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/ipv4/tcp_output.c | 3 ++- net/ipv4/tcp_timer.c | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 53277a9a23001d..b4a8a5b9350faf 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1722,7 +1722,8 @@ static inline int __tcp_mtu_to_mss(struct sock *sk, int pmtu) mss_now -= icsk->icsk_ext_hdr_len; /* Then reserve room for full set of TCP options and 8 bytes of data */ - mss_now = max(mss_now, sock_net(sk)->ipv4.sysctl_tcp_min_snd_mss); + mss_now = max(mss_now, + READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_min_snd_mss)); return mss_now; } diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 04063c7e33ba3a..39107bb730b07f 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -173,7 +173,7 @@ static void tcp_mtu_probing(struct inet_connection_sock *icsk, struct sock *sk) mss = tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_low) >> 1; mss = min(READ_ONCE(net->ipv4.sysctl_tcp_base_mss), mss); mss = max(mss, net->ipv4.sysctl_tcp_mtu_probe_floor); - mss = max(mss, net->ipv4.sysctl_tcp_min_snd_mss); + mss = max(mss, READ_ONCE(net->ipv4.sysctl_tcp_min_snd_mss)); icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, mss); } tcp_sync_mss(sk, icsk->icsk_pmtu_cookie); From cc36c37f5fe066c4708e623ead96dc8f57224bf5 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 13 Jul 2022 13:52:03 -0700 Subject: [PATCH 0818/1056] tcp: Fix a data-race around sysctl_tcp_mtu_probe_floor. [ Upstream commit 8e92d4423615a5257d0d871fc067aa561f597deb ] While reading sysctl_tcp_mtu_probe_floor, it can be changed concurrently. Thus, we need to add READ_ONCE() to its reader. Fixes: c04b79b6cfd7 ("tcp: add new tcp_mtu_probe_floor sysctl") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/ipv4/tcp_timer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 39107bb730b07f..4f3b9ab222b6e0 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -172,7 +172,7 @@ static void tcp_mtu_probing(struct inet_connection_sock *icsk, struct sock *sk) } else { mss = tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_low) >> 1; mss = min(READ_ONCE(net->ipv4.sysctl_tcp_base_mss), mss); - mss = max(mss, net->ipv4.sysctl_tcp_mtu_probe_floor); + mss = max(mss, READ_ONCE(net->ipv4.sysctl_tcp_mtu_probe_floor)); mss = max(mss, READ_ONCE(net->ipv4.sysctl_tcp_min_snd_mss)); icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, mss); } From f524c3e7f6cdad66b3b6a912cef47b656f8b0de3 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 13 Jul 2022 13:52:04 -0700 Subject: [PATCH 0819/1056] tcp: Fix a data-race around sysctl_tcp_probe_threshold. [ Upstream commit 92c0aa4175474483d6cf373314343d4e624e882a ] While reading sysctl_tcp_probe_threshold, it can be changed concurrently. Thus, we need to add READ_ONCE() to its reader. Fixes: 6b58e0a5f32d ("ipv4: Use binary search to choose tcp PMTU probe_size") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/ipv4/tcp_output.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index b4a8a5b9350faf..0ba48c43c06f89 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2363,7 +2363,7 @@ static int tcp_mtu_probe(struct sock *sk) * probing process by not resetting search range to its orignal. */ if (probe_size > tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_high) || - interval < net->ipv4.sysctl_tcp_probe_threshold) { + interval < READ_ONCE(net->ipv4.sysctl_tcp_probe_threshold)) { /* Check whether enough time has elaplased for * another round of probing. */ From e6b6f027e2854a51f345a5e3e808d7a88001d4f8 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 13 Jul 2022 13:52:05 -0700 Subject: [PATCH 0820/1056] tcp: Fix a data-race around sysctl_tcp_probe_interval. [ Upstream commit 2a85388f1d94a9f8b5a529118a2c5eaa0520d85c ] While reading sysctl_tcp_probe_interval, it can be changed concurrently. Thus, we need to add READ_ONCE() to its reader. Fixes: 05cbc0db03e8 ("ipv4: Create probe timer for tcp PMTU as per RFC4821") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/ipv4/tcp_output.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 0ba48c43c06f89..3fa2bfbc250d4e 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2281,7 +2281,7 @@ static inline void tcp_mtu_check_reprobe(struct sock *sk) u32 interval; s32 delta; - interval = net->ipv4.sysctl_tcp_probe_interval; + interval = READ_ONCE(net->ipv4.sysctl_tcp_probe_interval); delta = tcp_jiffies32 - icsk->icsk_mtup.probe_timestamp; if (unlikely(delta >= interval * HZ)) { int mss = tcp_current_mss(sk); From f784d232565b3f0bf7800968005378d3c7db8bd4 Mon Sep 17 00:00:00 2001 From: Biao Huang Date: Thu, 14 Jul 2022 14:00:13 +0800 Subject: [PATCH 0821/1056] net: stmmac: fix pm runtime issue in stmmac_dvr_remove() [ Upstream commit 0d9a15913b871e03fdd3b3d90a2e665fb22f9bcf ] If netif is running when stmmac_dvr_remove is invoked, the unregister_netdev will call ndo_stop(stmmac_release) and vlan_kill_rx_filter(stmmac_vlan_rx_kill_vid). Currently, stmmac_dvr_remove() will disable pm runtime before unregister_netdev. When stmmac_vlan_rx_kill_vid is invoked, pm_runtime_resume_and_get in it returns EACCESS error number, and reports: dwmac-mediatek 11021000.ethernet eth0: stmmac_dvr_remove: removing driver dwmac-mediatek 11021000.ethernet eth0: FPE workqueue stop dwmac-mediatek 11021000.ethernet eth0: failed to kill vid 0081/0 Move the pm_runtime_disable to the end of stmmac_dvr_remove to fix this issue. Fixes: 6449520391dfc ("net: stmmac: properly handle with runtime pm in stmmac_dvr_remove()") Signed-off-by: Biao Huang Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 9c1e19ea6fcd01..95e1307cfda2d1 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -7279,8 +7279,6 @@ int stmmac_dvr_remove(struct device *dev) netdev_info(priv->dev, "%s: removing driver", __func__); pm_runtime_get_sync(dev); - pm_runtime_disable(dev); - pm_runtime_put_noidle(dev); stmmac_stop_all_dma(priv); stmmac_mac_set(priv, priv->ioaddr, false); @@ -7307,6 +7305,9 @@ int stmmac_dvr_remove(struct device *dev) mutex_destroy(&priv->lock); bitmap_free(priv->af_xdp_zc_qps); + pm_runtime_disable(dev); + pm_runtime_put_noidle(dev); + return 0; } EXPORT_SYMBOL_GPL(stmmac_dvr_remove); From f53c6ee5e7eb080611c33f02d788573068094999 Mon Sep 17 00:00:00 2001 From: Biao Huang Date: Thu, 14 Jul 2022 14:00:14 +0800 Subject: [PATCH 0822/1056] net: stmmac: fix unbalanced ptp clock issue in suspend/resume flow [ Upstream commit f4c7d8948e866918d61493264dbbd67e45ef2bda ] Current stmmac driver will prepare/enable ptp_ref clock in stmmac_init_tstamp_counter(). The stmmac_pltfr_noirq_suspend will disable it once in suspend flow. But in resume flow, stmmac_pltfr_noirq_resume --> stmmac_init_tstamp_counter stmmac_resume --> stmmac_hw_setup --> stmmac_init_ptp --> stmmac_init_tstamp_counter ptp_ref clock reference counter increases twice, which leads to unbalance ptp clock when resume back. Move ptp_ref clock prepare/enable out of stmmac_init_tstamp_counter to fix it. Fixes: 0735e639f129d ("net: stmmac: skip only stmmac_ptp_register when resume from suspend") Signed-off-by: Biao Huang Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- .../net/ethernet/stmicro/stmmac/stmmac_main.c | 17 ++++++++--------- .../ethernet/stmicro/stmmac/stmmac_platform.c | 8 +++++++- 2 files changed, 15 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 95e1307cfda2d1..b4f83c86556840 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -844,19 +844,10 @@ int stmmac_init_tstamp_counter(struct stmmac_priv *priv, u32 systime_flags) struct timespec64 now; u32 sec_inc = 0; u64 temp = 0; - int ret; if (!(priv->dma_cap.time_stamp || priv->dma_cap.atime_stamp)) return -EOPNOTSUPP; - ret = clk_prepare_enable(priv->plat->clk_ptp_ref); - if (ret < 0) { - netdev_warn(priv->dev, - "failed to enable PTP reference clock: %pe\n", - ERR_PTR(ret)); - return ret; - } - stmmac_config_hw_tstamping(priv, priv->ptpaddr, systime_flags); priv->systime_flags = systime_flags; @@ -3325,6 +3316,14 @@ static int stmmac_hw_setup(struct net_device *dev, bool ptp_register) stmmac_mmc_setup(priv); + if (ptp_register) { + ret = clk_prepare_enable(priv->plat->clk_ptp_ref); + if (ret < 0) + netdev_warn(priv->dev, + "failed to enable PTP reference clock: %pe\n", + ERR_PTR(ret)); + } + ret = stmmac_init_ptp(priv); if (ret == -EOPNOTSUPP) netdev_warn(priv->dev, "PTP not supported by HW\n"); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c index 11e1055e8260f4..9f5cac4000da68 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c @@ -815,7 +815,13 @@ static int __maybe_unused stmmac_pltfr_noirq_resume(struct device *dev) if (ret) return ret; - stmmac_init_tstamp_counter(priv, priv->systime_flags); + ret = clk_prepare_enable(priv->plat->clk_ptp_ref); + if (ret < 0) { + netdev_warn(priv->dev, + "failed to enable PTP reference clock: %pe\n", + ERR_PTR(ret)); + return ret; + } } return 0; From 0156b402a5be677e0a2c5bbf7c2bb15c7ab9a889 Mon Sep 17 00:00:00 2001 From: Dario Binacchi Date: Tue, 18 Jan 2022 10:54:33 +0100 Subject: [PATCH 0823/1056] mtd: rawnand: gpmi: validate controller clock rate [ Upstream commit 15e27d197a7ea69b4643791ca2f8467fdd998359 ] What to do when the real rate of the gpmi clock is not equal to the required one? The solutions proposed in [1] did not lead to a conclusion on how to validate the clock rate, so, inspired by the document [2], I consider the rate correct only if not lower or equal to the rate of the previous edo mode. In fact, in chapter 4.16.2 (NV-DDR) of the document [2], it is written that "If the host selects timing mode n, then its clock period shall be faster than the clock period of timing mode n-1 and slower than or equal to the clock period of timing mode n.". I thought that it could therefore also be used in this case, without therefore having to define the valid rate ranges empirically. For example, suppose that gpmi_nfc_compute_timings() is called to set edo mode 5 (100MHz) but the rate returned by clk_round_rate() is 80MHz (edo mode 4). In this case gpmi_nfc_compute_timings() will return error, and will be called again to set edo mode 4, which this time will be successful. [1] https://lore.kernel.org/r/20210702065350.209646-5-ebiggers@kernel.org [2] http://www.onfi.org/-/media/client/onfi/specs/onfi_3_0_gold.pdf?la=en Co-developed-by: Michael Trimarchi Signed-off-by: Michael Trimarchi Signed-off-by: Dario Binacchi Tested-by: Sascha Hauer Reviewed-by: Sascha Hauer Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20220118095434.35081-4-dario.binacchi@amarulasolutions.com Signed-off-by: Sasha Levin --- drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c b/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c index b72b387c08ef78..62f4988c2a5f25 100644 --- a/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c +++ b/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c @@ -644,8 +644,8 @@ static int bch_set_geometry(struct gpmi_nand_data *this) * RDN_DELAY = ----------------------- {3} * RP */ -static void gpmi_nfc_compute_timings(struct gpmi_nand_data *this, - const struct nand_sdr_timings *sdr) +static int gpmi_nfc_compute_timings(struct gpmi_nand_data *this, + const struct nand_sdr_timings *sdr) { struct gpmi_nfc_hardware_timing *hw = &this->hw; struct resources *r = &this->resources; @@ -657,23 +657,33 @@ static void gpmi_nfc_compute_timings(struct gpmi_nand_data *this, int sample_delay_ps, sample_delay_factor; u16 busy_timeout_cycles; u8 wrn_dly_sel; + unsigned long clk_rate, min_rate; if (sdr->tRC_min >= 30000) { /* ONFI non-EDO modes [0-3] */ hw->clk_rate = 22000000; + min_rate = 0; wrn_dly_sel = BV_GPMI_CTRL1_WRN_DLY_SEL_4_TO_8NS; } else if (sdr->tRC_min >= 25000) { /* ONFI EDO mode 4 */ hw->clk_rate = 80000000; + min_rate = 22000000; wrn_dly_sel = BV_GPMI_CTRL1_WRN_DLY_SEL_NO_DELAY; } else { /* ONFI EDO mode 5 */ hw->clk_rate = 100000000; + min_rate = 80000000; wrn_dly_sel = BV_GPMI_CTRL1_WRN_DLY_SEL_NO_DELAY; } - hw->clk_rate = clk_round_rate(r->clock[0], hw->clk_rate); + clk_rate = clk_round_rate(r->clock[0], hw->clk_rate); + if (clk_rate <= min_rate) { + dev_err(this->dev, "clock setting: expected %ld, got %ld\n", + hw->clk_rate, clk_rate); + return -ENOTSUPP; + } + hw->clk_rate = clk_rate; /* SDR core timings are given in picoseconds */ period_ps = div_u64((u64)NSEC_PER_SEC * 1000, hw->clk_rate); @@ -714,6 +724,7 @@ static void gpmi_nfc_compute_timings(struct gpmi_nand_data *this, hw->ctrl1n |= BF_GPMI_CTRL1_RDN_DELAY(sample_delay_factor) | BM_GPMI_CTRL1_DLL_ENABLE | (use_half_period ? BM_GPMI_CTRL1_HALF_PERIOD : 0); + return 0; } static int gpmi_nfc_apply_timings(struct gpmi_nand_data *this) @@ -769,6 +780,7 @@ static int gpmi_setup_interface(struct nand_chip *chip, int chipnr, { struct gpmi_nand_data *this = nand_get_controller_data(chip); const struct nand_sdr_timings *sdr; + int ret; /* Retrieve required NAND timings */ sdr = nand_get_sdr_timings(conf); @@ -784,7 +796,9 @@ static int gpmi_setup_interface(struct nand_chip *chip, int chipnr, return 0; /* Do the actual derivation of the controller timings */ - gpmi_nfc_compute_timings(this, sdr); + ret = gpmi_nfc_compute_timings(this, sdr); + if (ret) + return ret; this->hw.must_apply_timings = true; From 212a5360ef4063f3ea3752e6e6a7e73e8e82acbd Mon Sep 17 00:00:00 2001 From: Sascha Hauer Date: Fri, 1 Jul 2022 13:03:41 +0200 Subject: [PATCH 0824/1056] mtd: rawnand: gpmi: Set WAIT_FOR_READY timeout based on program/erase times MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 0fddf9ad06fd9f439f137139861556671673e31c ] 06781a5026350 Fixes the calculation of the DEVICE_BUSY_TIMEOUT register value from busy_timeout_cycles. busy_timeout_cycles is calculated wrong though: It is calculated based on the maximum page read time, but the timeout is also used for page write and block erase operations which require orders of magnitude bigger timeouts. Fix this by calculating busy_timeout_cycles from the maximum of tBERS_max and tPROG_max. This is for now the easiest and most obvious way to fix the driver. There's room for improvements though: The NAND_OP_WAITRDY_INSTR tells us the desired timeout for the current operation, so we could program the timeout dynamically for each operation instead of setting a fixed timeout. Also we could wire up the interrupt handler to actually detect and forward timeouts occurred when waiting for the chip being ready. As a sidenote I verified that the change in 06781a5026350 is really correct. I wired up the interrupt handler in my tree and measured the time between starting the operation and the timeout interrupt handler coming in. The time increases 41us with each step in the timeout register which corresponds to 4096 clock cycles with the 99MHz clock that I have. Fixes: 06781a5026350 ("mtd: rawnand: gpmi: Fix setting busy timeout setting") Fixes: b1206122069aa ("mtd: rawniand: gpmi: use core timings instead of an empirical derivation") Cc: stable@vger.kernel.org Signed-off-by: Sascha Hauer Acked-by: Han Xu Tested-by: Tomasz Moń Signed-off-by: Richard Weinberger Signed-off-by: Sasha Levin --- drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c b/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c index 62f4988c2a5f25..aef722dfdef5f3 100644 --- a/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c +++ b/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c @@ -655,9 +655,10 @@ static int gpmi_nfc_compute_timings(struct gpmi_nand_data *this, unsigned int tRP_ps; bool use_half_period; int sample_delay_ps, sample_delay_factor; - u16 busy_timeout_cycles; + unsigned int busy_timeout_cycles; u8 wrn_dly_sel; unsigned long clk_rate, min_rate; + u64 busy_timeout_ps; if (sdr->tRC_min >= 30000) { /* ONFI non-EDO modes [0-3] */ @@ -690,7 +691,8 @@ static int gpmi_nfc_compute_timings(struct gpmi_nand_data *this, addr_setup_cycles = TO_CYCLES(sdr->tALS_min, period_ps); data_setup_cycles = TO_CYCLES(sdr->tDS_min, period_ps); data_hold_cycles = TO_CYCLES(sdr->tDH_min, period_ps); - busy_timeout_cycles = TO_CYCLES(sdr->tWB_max + sdr->tR_max, period_ps); + busy_timeout_ps = max(sdr->tBERS_max, sdr->tPROG_max); + busy_timeout_cycles = TO_CYCLES(busy_timeout_ps, period_ps); hw->timing0 = BF_GPMI_TIMING0_ADDRESS_SETUP(addr_setup_cycles) | BF_GPMI_TIMING0_DATA_HOLD(data_hold_cycles) | From 88ec2ff42da3ac93b2437dc52fe25cd4372148e6 Mon Sep 17 00:00:00 2001 From: Liang He Date: Thu, 14 Jul 2022 23:31:38 +0800 Subject: [PATCH 0825/1056] net: dsa: microchip: ksz_common: Fix refcount leak bug [ Upstream commit a14bd7475452c51835dd5a0cee4c8fa48dd0b539 ] In ksz_switch_register(), we should call of_node_put() for the reference returned by of_get_child_by_name() which has increased the refcount. Fixes: 912aae27c6af ("net: dsa: microchip: really look for phy-mode in port nodes") Signed-off-by: Liang He Reviewed-by: Vladimir Oltean Link: https://lore.kernel.org/r/20220714153138.375919-1-windhl@126.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/dsa/microchip/ksz_common.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c index 7c2968a639eba5..4c4e6990c0ae9e 100644 --- a/drivers/net/dsa/microchip/ksz_common.c +++ b/drivers/net/dsa/microchip/ksz_common.c @@ -414,18 +414,21 @@ int ksz_switch_register(struct ksz_device *dev, ports = of_get_child_by_name(dev->dev->of_node, "ethernet-ports"); if (!ports) ports = of_get_child_by_name(dev->dev->of_node, "ports"); - if (ports) + if (ports) { for_each_available_child_of_node(ports, port) { if (of_property_read_u32(port, "reg", &port_num)) continue; if (!(dev->port_mask & BIT(port_num))) { of_node_put(port); + of_node_put(ports); return -EINVAL; } of_get_phy_mode(port, &dev->ports[port_num].interface); } + of_node_put(ports); + } dev->synclko_125 = of_property_read_bool(dev->dev->of_node, "microchip,synclko-125"); } From 5158e18225c06f39cde0176a431db6e60f52ebc2 Mon Sep 17 00:00:00 2001 From: Menglong Dong Date: Sun, 9 Jan 2022 14:36:26 +0800 Subject: [PATCH 0826/1056] net: skb: introduce kfree_skb_reason() [ Upstream commit c504e5c2f9648a1e5c2be01e8c3f59d394192bd3 ] Introduce the interface kfree_skb_reason(), which is able to pass the reason why the skb is dropped to 'kfree_skb' tracepoint. Add the 'reason' field to 'trace_kfree_skb', therefor user can get more detail information about abnormal skb with 'drop_monitor' or eBPF. All drop reasons are defined in the enum 'skb_drop_reason', and they will be print as string in 'kfree_skb' tracepoint in format of 'reason: XXX'. ( Maybe the reasons should be defined in a uapi header file, so that user space can use them? ) Signed-off-by: Menglong Dong Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- include/linux/skbuff.h | 23 ++++++++++++++++++++++- include/trace/events/skb.h | 36 +++++++++++++++++++++++++++++------- net/core/dev.c | 3 ++- net/core/drop_monitor.c | 10 +++++++--- net/core/skbuff.c | 12 +++++++----- 5 files changed, 67 insertions(+), 17 deletions(-) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index e213acaa91ecc2..029bc228bcf913 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -304,6 +304,17 @@ struct sk_buff_head { struct sk_buff; +/* The reason of skb drop, which is used in kfree_skb_reason(). + * en...maybe they should be splited by group? + * + * Each item here should also be in 'TRACE_SKB_DROP_REASON', which is + * used to translate the reason to string. + */ +enum skb_drop_reason { + SKB_DROP_REASON_NOT_SPECIFIED, + SKB_DROP_REASON_MAX, +}; + /* To allow 64K frame to be packed as single skb without frag_list we * require 64K/PAGE_SIZE pages plus 1 additional page to allow for * buffers which do not start on a page boundary. @@ -1074,8 +1085,18 @@ static inline bool skb_unref(struct sk_buff *skb) return true; } +void kfree_skb_reason(struct sk_buff *skb, enum skb_drop_reason reason); + +/** + * kfree_skb - free an sk_buff with 'NOT_SPECIFIED' reason + * @skb: buffer to free + */ +static inline void kfree_skb(struct sk_buff *skb) +{ + kfree_skb_reason(skb, SKB_DROP_REASON_NOT_SPECIFIED); +} + void skb_release_head_state(struct sk_buff *skb); -void kfree_skb(struct sk_buff *skb); void kfree_skb_list(struct sk_buff *segs); void skb_dump(const char *level, const struct sk_buff *skb, bool full_pkt); void skb_tx_error(struct sk_buff *skb); diff --git a/include/trace/events/skb.h b/include/trace/events/skb.h index 9e92f22eb086c2..294c61bbe44b6e 100644 --- a/include/trace/events/skb.h +++ b/include/trace/events/skb.h @@ -9,29 +9,51 @@ #include #include +#define TRACE_SKB_DROP_REASON \ + EM(SKB_DROP_REASON_NOT_SPECIFIED, NOT_SPECIFIED) \ + EMe(SKB_DROP_REASON_MAX, MAX) + +#undef EM +#undef EMe + +#define EM(a, b) TRACE_DEFINE_ENUM(a); +#define EMe(a, b) TRACE_DEFINE_ENUM(a); + +TRACE_SKB_DROP_REASON + +#undef EM +#undef EMe +#define EM(a, b) { a, #b }, +#define EMe(a, b) { a, #b } + /* * Tracepoint for free an sk_buff: */ TRACE_EVENT(kfree_skb, - TP_PROTO(struct sk_buff *skb, void *location), + TP_PROTO(struct sk_buff *skb, void *location, + enum skb_drop_reason reason), - TP_ARGS(skb, location), + TP_ARGS(skb, location, reason), TP_STRUCT__entry( - __field( void *, skbaddr ) - __field( void *, location ) - __field( unsigned short, protocol ) + __field(void *, skbaddr) + __field(void *, location) + __field(unsigned short, protocol) + __field(enum skb_drop_reason, reason) ), TP_fast_assign( __entry->skbaddr = skb; __entry->location = location; __entry->protocol = ntohs(skb->protocol); + __entry->reason = reason; ), - TP_printk("skbaddr=%p protocol=%u location=%p", - __entry->skbaddr, __entry->protocol, __entry->location) + TP_printk("skbaddr=%p protocol=%u location=%p reason: %s", + __entry->skbaddr, __entry->protocol, __entry->location, + __print_symbolic(__entry->reason, + TRACE_SKB_DROP_REASON)) ); TRACE_EVENT(consume_skb, diff --git a/net/core/dev.c b/net/core/dev.c index 6111506a41053a..12b1811cb488b0 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5005,7 +5005,8 @@ static __latent_entropy void net_tx_action(struct softirq_action *h) if (likely(get_kfree_skb_cb(skb)->reason == SKB_REASON_CONSUMED)) trace_consume_skb(skb); else - trace_kfree_skb(skb, net_tx_action); + trace_kfree_skb(skb, net_tx_action, + SKB_DROP_REASON_NOT_SPECIFIED); if (skb->fclone != SKB_FCLONE_UNAVAILABLE) __kfree_skb(skb); diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c index 1d99b731e5b215..78202141930f41 100644 --- a/net/core/drop_monitor.c +++ b/net/core/drop_monitor.c @@ -110,7 +110,8 @@ static u32 net_dm_queue_len = 1000; struct net_dm_alert_ops { void (*kfree_skb_probe)(void *ignore, struct sk_buff *skb, - void *location); + void *location, + enum skb_drop_reason reason); void (*napi_poll_probe)(void *ignore, struct napi_struct *napi, int work, int budget); void (*work_item_func)(struct work_struct *work); @@ -262,7 +263,9 @@ static void trace_drop_common(struct sk_buff *skb, void *location) spin_unlock_irqrestore(&data->lock, flags); } -static void trace_kfree_skb_hit(void *ignore, struct sk_buff *skb, void *location) +static void trace_kfree_skb_hit(void *ignore, struct sk_buff *skb, + void *location, + enum skb_drop_reason reason) { trace_drop_common(skb, location); } @@ -494,7 +497,8 @@ static const struct net_dm_alert_ops net_dm_alert_summary_ops = { static void net_dm_packet_trace_kfree_skb_hit(void *ignore, struct sk_buff *skb, - void *location) + void *location, + enum skb_drop_reason reason) { ktime_t tstamp = ktime_get_real(); struct per_cpu_dm_data *data; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 7ef0f5a8ab036b..5ebef94e14dc61 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -759,21 +759,23 @@ void __kfree_skb(struct sk_buff *skb) EXPORT_SYMBOL(__kfree_skb); /** - * kfree_skb - free an sk_buff + * kfree_skb_reason - free an sk_buff with special reason * @skb: buffer to free + * @reason: reason why this skb is dropped * * Drop a reference to the buffer and free it if the usage count has - * hit zero. + * hit zero. Meanwhile, pass the drop reason to 'kfree_skb' + * tracepoint. */ -void kfree_skb(struct sk_buff *skb) +void kfree_skb_reason(struct sk_buff *skb, enum skb_drop_reason reason) { if (!skb_unref(skb)) return; - trace_kfree_skb(skb, __builtin_return_address(0)); + trace_kfree_skb(skb, __builtin_return_address(0), reason); __kfree_skb(skb); } -EXPORT_SYMBOL(kfree_skb); +EXPORT_SYMBOL(kfree_skb_reason); void kfree_skb_list(struct sk_buff *segs) { From 1629144da3838569a4023d16fd1764039e98705b Mon Sep 17 00:00:00 2001 From: Menglong Dong Date: Sun, 9 Jan 2022 14:36:27 +0800 Subject: [PATCH 0827/1056] net: skb: use kfree_skb_reason() in tcp_v4_rcv() [ Upstream commit 85125597419aec3aa7b8f3b8713e415f997796f2 ] Replace kfree_skb() with kfree_skb_reason() in tcp_v4_rcv(). Following drop reasons are added: SKB_DROP_REASON_NO_SOCKET SKB_DROP_REASON_PKT_TOO_SMALL SKB_DROP_REASON_TCP_CSUM SKB_DROP_REASON_TCP_FILTER After this patch, 'kfree_skb' event will print message like this: $ TASK-PID CPU# ||||| TIMESTAMP FUNCTION $ | | | ||||| | | -0 [000] ..s1. 36.113438: kfree_skb: skbaddr=(____ptrval____) protocol=2048 location=(____ptrval____) reason: NO_SOCKET The reason of skb drop is printed too. Signed-off-by: Menglong Dong Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- include/linux/skbuff.h | 4 ++++ include/trace/events/skb.h | 4 ++++ net/ipv4/tcp_ipv4.c | 14 +++++++++++--- 3 files changed, 19 insertions(+), 3 deletions(-) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 029bc228bcf913..5305af6cc86fae 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -312,6 +312,10 @@ struct sk_buff; */ enum skb_drop_reason { SKB_DROP_REASON_NOT_SPECIFIED, + SKB_DROP_REASON_NO_SOCKET, + SKB_DROP_REASON_PKT_TOO_SMALL, + SKB_DROP_REASON_TCP_CSUM, + SKB_DROP_REASON_TCP_FILTER, SKB_DROP_REASON_MAX, }; diff --git a/include/trace/events/skb.h b/include/trace/events/skb.h index 294c61bbe44b6e..faa7d068a7bc0e 100644 --- a/include/trace/events/skb.h +++ b/include/trace/events/skb.h @@ -11,6 +11,10 @@ #define TRACE_SKB_DROP_REASON \ EM(SKB_DROP_REASON_NOT_SPECIFIED, NOT_SPECIFIED) \ + EM(SKB_DROP_REASON_NO_SOCKET, NO_SOCKET) \ + EM(SKB_DROP_REASON_PKT_TOO_SMALL, PKT_TOO_SMALL) \ + EM(SKB_DROP_REASON_TCP_CSUM, TCP_CSUM) \ + EM(SKB_DROP_REASON_TCP_FILTER, TCP_FILTER) \ EMe(SKB_DROP_REASON_MAX, MAX) #undef EM diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index b9a9f288bfa600..d901858aa440e7 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1976,8 +1976,10 @@ int tcp_v4_rcv(struct sk_buff *skb) const struct tcphdr *th; bool refcounted; struct sock *sk; + int drop_reason; int ret; + drop_reason = SKB_DROP_REASON_NOT_SPECIFIED; if (skb->pkt_type != PACKET_HOST) goto discard_it; @@ -1989,8 +1991,10 @@ int tcp_v4_rcv(struct sk_buff *skb) th = (const struct tcphdr *)skb->data; - if (unlikely(th->doff < sizeof(struct tcphdr) / 4)) + if (unlikely(th->doff < sizeof(struct tcphdr) / 4)) { + drop_reason = SKB_DROP_REASON_PKT_TOO_SMALL; goto bad_packet; + } if (!pskb_may_pull(skb, th->doff * 4)) goto discard_it; @@ -2093,8 +2097,10 @@ int tcp_v4_rcv(struct sk_buff *skb) nf_reset_ct(skb); - if (tcp_filter(sk, skb)) + if (tcp_filter(sk, skb)) { + drop_reason = SKB_DROP_REASON_TCP_FILTER; goto discard_and_relse; + } th = (const struct tcphdr *)skb->data; iph = ip_hdr(skb); tcp_v4_fill_cb(skb, iph, th); @@ -2131,6 +2137,7 @@ int tcp_v4_rcv(struct sk_buff *skb) return ret; no_tcp_socket: + drop_reason = SKB_DROP_REASON_NO_SOCKET; if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) goto discard_it; @@ -2138,6 +2145,7 @@ int tcp_v4_rcv(struct sk_buff *skb) if (tcp_checksum_complete(skb)) { csum_error: + drop_reason = SKB_DROP_REASON_TCP_CSUM; trace_tcp_bad_csum(skb); __TCP_INC_STATS(net, TCP_MIB_CSUMERRORS); bad_packet: @@ -2148,7 +2156,7 @@ int tcp_v4_rcv(struct sk_buff *skb) discard_it: /* Discard frame. */ - kfree_skb(skb); + kfree_skb_reason(skb, drop_reason); return 0; discard_and_relse: From b7adefdae236f5e5aff9fa856a9fb6992abd7341 Mon Sep 17 00:00:00 2001 From: Menglong Dong Date: Sun, 9 Jan 2022 14:36:28 +0800 Subject: [PATCH 0828/1056] net: skb: use kfree_skb_reason() in __udp4_lib_rcv() [ Upstream commit 1c7fab70df085d866a3765955f397ca2b4025b15 ] Replace kfree_skb() with kfree_skb_reason() in __udp4_lib_rcv. New drop reason 'SKB_DROP_REASON_UDP_CSUM' is added for udp csum error. Signed-off-by: Menglong Dong Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- include/linux/skbuff.h | 1 + include/trace/events/skb.h | 1 + net/ipv4/udp.c | 10 ++++++++-- 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 5305af6cc86fae..66aac200686837 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -316,6 +316,7 @@ enum skb_drop_reason { SKB_DROP_REASON_PKT_TOO_SMALL, SKB_DROP_REASON_TCP_CSUM, SKB_DROP_REASON_TCP_FILTER, + SKB_DROP_REASON_UDP_CSUM, SKB_DROP_REASON_MAX, }; diff --git a/include/trace/events/skb.h b/include/trace/events/skb.h index faa7d068a7bc0e..3e042ca2cedb50 100644 --- a/include/trace/events/skb.h +++ b/include/trace/events/skb.h @@ -15,6 +15,7 @@ EM(SKB_DROP_REASON_PKT_TOO_SMALL, PKT_TOO_SMALL) \ EM(SKB_DROP_REASON_TCP_CSUM, TCP_CSUM) \ EM(SKB_DROP_REASON_TCP_FILTER, TCP_FILTER) \ + EM(SKB_DROP_REASON_UDP_CSUM, UDP_CSUM) \ EMe(SKB_DROP_REASON_MAX, MAX) #undef EM diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 835b9d6e4e686e..4ad4daa16cce5b 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -2411,6 +2411,9 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, __be32 saddr, daddr; struct net *net = dev_net(skb->dev); bool refcounted; + int drop_reason; + + drop_reason = SKB_DROP_REASON_NOT_SPECIFIED; /* * Validate the packet. @@ -2466,6 +2469,7 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, if (udp_lib_checksum_complete(skb)) goto csum_error; + drop_reason = SKB_DROP_REASON_NO_SOCKET; __UDP_INC_STATS(net, UDP_MIB_NOPORTS, proto == IPPROTO_UDPLITE); icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); @@ -2473,10 +2477,11 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, * Hmm. We got an UDP packet to a port to which we * don't wanna listen. Ignore it. */ - kfree_skb(skb); + kfree_skb_reason(skb, drop_reason); return 0; short_packet: + drop_reason = SKB_DROP_REASON_PKT_TOO_SMALL; net_dbg_ratelimited("UDP%s: short packet: From %pI4:%u %d/%d to %pI4:%u\n", proto == IPPROTO_UDPLITE ? "Lite" : "", &saddr, ntohs(uh->source), @@ -2489,6 +2494,7 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, * RFC1122: OK. Discards the bad packet silently (as far as * the network is concerned, anyway) as per 4.1.3.4 (MUST). */ + drop_reason = SKB_DROP_REASON_UDP_CSUM; net_dbg_ratelimited("UDP%s: bad checksum. From %pI4:%u to %pI4:%u ulen %d\n", proto == IPPROTO_UDPLITE ? "Lite" : "", &saddr, ntohs(uh->source), &daddr, ntohs(uh->dest), @@ -2496,7 +2502,7 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, __UDP_INC_STATS(net, UDP_MIB_CSUMERRORS, proto == IPPROTO_UDPLITE); drop: __UDP_INC_STATS(net, UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE); - kfree_skb(skb); + kfree_skb_reason(skb, drop_reason); return 0; } From 8ac90de3d13709a72537400245448c9275276041 Mon Sep 17 00:00:00 2001 From: Menglong Dong Date: Thu, 27 Jan 2022 17:13:01 +0800 Subject: [PATCH 0829/1056] net: socket: rename SKB_DROP_REASON_SOCKET_FILTER [ Upstream commit 364df53c081d93fcfd6b91085ff2650c7f17b3c7 ] Rename SKB_DROP_REASON_SOCKET_FILTER, which is used as the reason of skb drop out of socket filter before it's part of a released kernel. It will be used for more protocols than just TCP in future series. Signed-off-by: Menglong Dong Reviewed-by: David Ahern Link: https://lore.kernel.org/all/20220127091308.91401-2-imagedong@tencent.com/ Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- include/linux/skbuff.h | 2 +- include/trace/events/skb.h | 2 +- net/ipv4/tcp_ipv4.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 66aac200686837..f92f05c9d72d3a 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -315,7 +315,7 @@ enum skb_drop_reason { SKB_DROP_REASON_NO_SOCKET, SKB_DROP_REASON_PKT_TOO_SMALL, SKB_DROP_REASON_TCP_CSUM, - SKB_DROP_REASON_TCP_FILTER, + SKB_DROP_REASON_SOCKET_FILTER, SKB_DROP_REASON_UDP_CSUM, SKB_DROP_REASON_MAX, }; diff --git a/include/trace/events/skb.h b/include/trace/events/skb.h index 3e042ca2cedb50..a8a64b97504d8a 100644 --- a/include/trace/events/skb.h +++ b/include/trace/events/skb.h @@ -14,7 +14,7 @@ EM(SKB_DROP_REASON_NO_SOCKET, NO_SOCKET) \ EM(SKB_DROP_REASON_PKT_TOO_SMALL, PKT_TOO_SMALL) \ EM(SKB_DROP_REASON_TCP_CSUM, TCP_CSUM) \ - EM(SKB_DROP_REASON_TCP_FILTER, TCP_FILTER) \ + EM(SKB_DROP_REASON_SOCKET_FILTER, SOCKET_FILTER) \ EM(SKB_DROP_REASON_UDP_CSUM, UDP_CSUM) \ EMe(SKB_DROP_REASON_MAX, MAX) diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index d901858aa440e7..235ae91bfd5a06 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2098,7 +2098,7 @@ int tcp_v4_rcv(struct sk_buff *skb) nf_reset_ct(skb); if (tcp_filter(sk, skb)) { - drop_reason = SKB_DROP_REASON_TCP_FILTER; + drop_reason = SKB_DROP_REASON_SOCKET_FILTER; goto discard_and_relse; } th = (const struct tcphdr *)skb->data; From f3ed670ef8500b41d58805792a5a10d4adb3563b Mon Sep 17 00:00:00 2001 From: Menglong Dong Date: Sat, 5 Feb 2022 15:47:33 +0800 Subject: [PATCH 0830/1056] net: skb_drop_reason: add document for drop reasons [ Upstream commit 88590b369354092183bcba04e2368010c462557f ] Add document for following existing drop reasons: SKB_DROP_REASON_NOT_SPECIFIED SKB_DROP_REASON_NO_SOCKET SKB_DROP_REASON_PKT_TOO_SMALL SKB_DROP_REASON_TCP_CSUM SKB_DROP_REASON_SOCKET_FILTER SKB_DROP_REASON_UDP_CSUM Signed-off-by: Menglong Dong Reviewed-by: David Ahern Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- include/linux/skbuff.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index f92f05c9d72d3a..f329c617eb965e 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -311,12 +311,12 @@ struct sk_buff; * used to translate the reason to string. */ enum skb_drop_reason { - SKB_DROP_REASON_NOT_SPECIFIED, - SKB_DROP_REASON_NO_SOCKET, - SKB_DROP_REASON_PKT_TOO_SMALL, - SKB_DROP_REASON_TCP_CSUM, - SKB_DROP_REASON_SOCKET_FILTER, - SKB_DROP_REASON_UDP_CSUM, + SKB_DROP_REASON_NOT_SPECIFIED, /* drop reason is not specified */ + SKB_DROP_REASON_NO_SOCKET, /* socket not found */ + SKB_DROP_REASON_PKT_TOO_SMALL, /* packet size is too small */ + SKB_DROP_REASON_TCP_CSUM, /* TCP checksum error */ + SKB_DROP_REASON_SOCKET_FILTER, /* dropped by socket filter */ + SKB_DROP_REASON_UDP_CSUM, /* UDP checksum error */ SKB_DROP_REASON_MAX, }; From 82cda99184736367a8768782f28cc1d13c9794ed Mon Sep 17 00:00:00 2001 From: Menglong Dong Date: Sat, 5 Feb 2022 15:47:34 +0800 Subject: [PATCH 0831/1056] net: netfilter: use kfree_drop_reason() for NF_DROP [ Upstream commit 2df3041ba3be950376e8c25a8f6da22f7fcc765c ] Replace kfree_skb() with kfree_skb_reason() in nf_hook_slow() when skb is dropped by reason of NF_DROP. Following new drop reasons are introduced: SKB_DROP_REASON_NETFILTER_DROP Signed-off-by: Menglong Dong Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- include/linux/skbuff.h | 1 + include/trace/events/skb.h | 1 + net/netfilter/core.c | 3 ++- 3 files changed, 4 insertions(+), 1 deletion(-) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index f329c617eb965e..b63da0d1a4b253 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -317,6 +317,7 @@ enum skb_drop_reason { SKB_DROP_REASON_TCP_CSUM, /* TCP checksum error */ SKB_DROP_REASON_SOCKET_FILTER, /* dropped by socket filter */ SKB_DROP_REASON_UDP_CSUM, /* UDP checksum error */ + SKB_DROP_REASON_NETFILTER_DROP, /* dropped by netfilter */ SKB_DROP_REASON_MAX, }; diff --git a/include/trace/events/skb.h b/include/trace/events/skb.h index a8a64b97504d8a..3d89f7b09a438a 100644 --- a/include/trace/events/skb.h +++ b/include/trace/events/skb.h @@ -16,6 +16,7 @@ EM(SKB_DROP_REASON_TCP_CSUM, TCP_CSUM) \ EM(SKB_DROP_REASON_SOCKET_FILTER, SOCKET_FILTER) \ EM(SKB_DROP_REASON_UDP_CSUM, UDP_CSUM) \ + EM(SKB_DROP_REASON_NETFILTER_DROP, NETFILTER_DROP) \ EMe(SKB_DROP_REASON_MAX, MAX) #undef EM diff --git a/net/netfilter/core.c b/net/netfilter/core.c index 60332fdb6dd442..cca0762a901022 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -592,7 +592,8 @@ int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state, case NF_ACCEPT: break; case NF_DROP: - kfree_skb(skb); + kfree_skb_reason(skb, + SKB_DROP_REASON_NETFILTER_DROP); ret = NF_DROP_GETERR(verdict); if (ret == 0) ret = -EPERM; From ad5a78ed1356f4f3b397790802f7348b648465a0 Mon Sep 17 00:00:00 2001 From: Menglong Dong Date: Sat, 5 Feb 2022 15:47:35 +0800 Subject: [PATCH 0832/1056] net: ipv4: use kfree_skb_reason() in ip_rcv_core() [ Upstream commit 33cba42985c8144eef78d618fc1e51aaa074b169 ] Replace kfree_skb() with kfree_skb_reason() in ip_rcv_core(). Three new drop reasons are introduced: SKB_DROP_REASON_OTHERHOST SKB_DROP_REASON_IP_CSUM SKB_DROP_REASON_IP_INHDR Signed-off-by: Menglong Dong Reviewed-by: David Ahern Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- include/linux/skbuff.h | 9 +++++++++ include/trace/events/skb.h | 3 +++ net/ipv4/ip_input.c | 12 ++++++++++-- 3 files changed, 22 insertions(+), 2 deletions(-) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index b63da0d1a4b253..514fb8074f782e 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -318,6 +318,15 @@ enum skb_drop_reason { SKB_DROP_REASON_SOCKET_FILTER, /* dropped by socket filter */ SKB_DROP_REASON_UDP_CSUM, /* UDP checksum error */ SKB_DROP_REASON_NETFILTER_DROP, /* dropped by netfilter */ + SKB_DROP_REASON_OTHERHOST, /* packet don't belong to current + * host (interface is in promisc + * mode) + */ + SKB_DROP_REASON_IP_CSUM, /* IP checksum error */ + SKB_DROP_REASON_IP_INHDR, /* there is something wrong with + * IP header (see + * IPSTATS_MIB_INHDRERRORS) + */ SKB_DROP_REASON_MAX, }; diff --git a/include/trace/events/skb.h b/include/trace/events/skb.h index 3d89f7b09a438a..f2b1778485f01b 100644 --- a/include/trace/events/skb.h +++ b/include/trace/events/skb.h @@ -17,6 +17,9 @@ EM(SKB_DROP_REASON_SOCKET_FILTER, SOCKET_FILTER) \ EM(SKB_DROP_REASON_UDP_CSUM, UDP_CSUM) \ EM(SKB_DROP_REASON_NETFILTER_DROP, NETFILTER_DROP) \ + EM(SKB_DROP_REASON_OTHERHOST, OTHERHOST) \ + EM(SKB_DROP_REASON_IP_CSUM, IP_CSUM) \ + EM(SKB_DROP_REASON_IP_INHDR, IP_INHDR) \ EMe(SKB_DROP_REASON_MAX, MAX) #undef EM diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index 3a025c0119718c..7be18de32e16e8 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -436,13 +436,16 @@ static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb) static struct sk_buff *ip_rcv_core(struct sk_buff *skb, struct net *net) { const struct iphdr *iph; + int drop_reason; u32 len; /* When the interface is in promisc. mode, drop all the crap * that it receives, do not try to analyse it. */ - if (skb->pkt_type == PACKET_OTHERHOST) + if (skb->pkt_type == PACKET_OTHERHOST) { + drop_reason = SKB_DROP_REASON_OTHERHOST; goto drop; + } __IP_UPD_PO_STATS(net, IPSTATS_MIB_IN, skb->len); @@ -452,6 +455,7 @@ static struct sk_buff *ip_rcv_core(struct sk_buff *skb, struct net *net) goto out; } + drop_reason = SKB_DROP_REASON_NOT_SPECIFIED; if (!pskb_may_pull(skb, sizeof(struct iphdr))) goto inhdr_error; @@ -488,6 +492,7 @@ static struct sk_buff *ip_rcv_core(struct sk_buff *skb, struct net *net) len = ntohs(iph->tot_len); if (skb->len < len) { + drop_reason = SKB_DROP_REASON_PKT_TOO_SMALL; __IP_INC_STATS(net, IPSTATS_MIB_INTRUNCATEDPKTS); goto drop; } else if (len < (iph->ihl*4)) @@ -516,11 +521,14 @@ static struct sk_buff *ip_rcv_core(struct sk_buff *skb, struct net *net) return skb; csum_error: + drop_reason = SKB_DROP_REASON_IP_CSUM; __IP_INC_STATS(net, IPSTATS_MIB_CSUMERRORS); inhdr_error: + if (drop_reason == SKB_DROP_REASON_NOT_SPECIFIED) + drop_reason = SKB_DROP_REASON_IP_INHDR; __IP_INC_STATS(net, IPSTATS_MIB_INHDRERRORS); drop: - kfree_skb(skb); + kfree_skb_reason(skb, drop_reason); out: return NULL; } From b8e68fce6f0e5ad0e04f5d8c9bd4606e93ce461d Mon Sep 17 00:00:00 2001 From: Menglong Dong Date: Sat, 5 Feb 2022 15:47:36 +0800 Subject: [PATCH 0833/1056] net: ipv4: use kfree_skb_reason() in ip_rcv_finish_core() [ Upstream commit c1f166d1f7eef212096a98b22f5acf92f9af353d ] Replace kfree_skb() with kfree_skb_reason() in ip_rcv_finish_core(), following drop reasons are introduced: SKB_DROP_REASON_IP_RPFILTER SKB_DROP_REASON_UNICAST_IN_L2_MULTICAST Signed-off-by: Menglong Dong Reviewed-by: David Ahern Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- include/linux/skbuff.h | 9 +++++++++ include/trace/events/skb.h | 3 +++ net/ipv4/ip_input.c | 14 ++++++++++---- 3 files changed, 22 insertions(+), 4 deletions(-) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 514fb8074f782e..cbd719e5329a4d 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -327,6 +327,15 @@ enum skb_drop_reason { * IP header (see * IPSTATS_MIB_INHDRERRORS) */ + SKB_DROP_REASON_IP_RPFILTER, /* IP rpfilter validate failed. + * see the document for rp_filter + * in ip-sysctl.rst for more + * information + */ + SKB_DROP_REASON_UNICAST_IN_L2_MULTICAST, /* destination address of L2 + * is multicast, but L3 is + * unicast. + */ SKB_DROP_REASON_MAX, }; diff --git a/include/trace/events/skb.h b/include/trace/events/skb.h index f2b1778485f01b..485a1d3034a4b2 100644 --- a/include/trace/events/skb.h +++ b/include/trace/events/skb.h @@ -20,6 +20,9 @@ EM(SKB_DROP_REASON_OTHERHOST, OTHERHOST) \ EM(SKB_DROP_REASON_IP_CSUM, IP_CSUM) \ EM(SKB_DROP_REASON_IP_INHDR, IP_INHDR) \ + EM(SKB_DROP_REASON_IP_RPFILTER, IP_RPFILTER) \ + EM(SKB_DROP_REASON_UNICAST_IN_L2_MULTICAST, \ + UNICAST_IN_L2_MULTICAST) \ EMe(SKB_DROP_REASON_MAX, MAX) #undef EM diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index 7be18de32e16e8..d5222c0fa87cb6 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -318,8 +318,10 @@ static int ip_rcv_finish_core(struct net *net, struct sock *sk, { const struct iphdr *iph = ip_hdr(skb); int (*edemux)(struct sk_buff *skb); + int err, drop_reason; struct rtable *rt; - int err; + + drop_reason = SKB_DROP_REASON_NOT_SPECIFIED; if (ip_can_use_hint(skb, iph, hint)) { err = ip_route_use_hint(skb, iph->daddr, iph->saddr, iph->tos, @@ -396,19 +398,23 @@ static int ip_rcv_finish_core(struct net *net, struct sock *sk, * so-called "hole-196" attack) so do it for both. */ if (in_dev && - IN_DEV_ORCONF(in_dev, DROP_UNICAST_IN_L2_MULTICAST)) + IN_DEV_ORCONF(in_dev, DROP_UNICAST_IN_L2_MULTICAST)) { + drop_reason = SKB_DROP_REASON_UNICAST_IN_L2_MULTICAST; goto drop; + } } return NET_RX_SUCCESS; drop: - kfree_skb(skb); + kfree_skb_reason(skb, drop_reason); return NET_RX_DROP; drop_error: - if (err == -EXDEV) + if (err == -EXDEV) { + drop_reason = SKB_DROP_REASON_IP_RPFILTER; __NET_INC_STATS(net, LINUX_MIB_IPRPFILTER); + } goto drop; } From 66b73ef38d80a135b090f96948ca51ed1a3238d9 Mon Sep 17 00:00:00 2001 From: Vadim Pasternak Date: Wed, 13 Jul 2022 12:14:05 +0300 Subject: [PATCH 0834/1056] i2c: mlxcpld: Fix register setting for 400KHz frequency [ Upstream commit e1f77ecc75aaee6bed04e8fd7830e00032af012e ] Fix setting of 'Half Cycle' register for 400KHz frequency. Fixes: fa1049135c15 ("i2c: mlxcpld: Modify register setting for 400KHz frequency") Signed-off-by: Vadim Pasternak Signed-off-by: Wolfram Sang Signed-off-by: Sasha Levin --- drivers/i2c/busses/i2c-mlxcpld.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-mlxcpld.c b/drivers/i2c/busses/i2c-mlxcpld.c index 015e11c4663f3b..077d716c73caab 100644 --- a/drivers/i2c/busses/i2c-mlxcpld.c +++ b/drivers/i2c/busses/i2c-mlxcpld.c @@ -49,7 +49,7 @@ #define MLXCPLD_LPCI2C_NACK_IND 2 #define MLXCPLD_I2C_FREQ_1000KHZ_SET 0x04 -#define MLXCPLD_I2C_FREQ_400KHZ_SET 0x0c +#define MLXCPLD_I2C_FREQ_400KHZ_SET 0x0e #define MLXCPLD_I2C_FREQ_100KHZ_SET 0x42 enum mlxcpld_i2c_frequency { From 196c21deb08a43e7d03910f69232352554dba095 Mon Sep 17 00:00:00 2001 From: Robert Hancock Date: Tue, 14 Jun 2022 17:29:19 -0600 Subject: [PATCH 0835/1056] i2c: cadence: Change large transfer count reset logic to be unconditional [ Upstream commit 4ca8ca873d454635c20d508261bfc0081af75cf8 ] Problems were observed on the Xilinx ZynqMP platform with large I2C reads. When a read of 277 bytes was performed, the controller NAKed the transfer after only 252 bytes were transferred and returned an ENXIO error on the transfer. There is some code in cdns_i2c_master_isr to handle this case by resetting the transfer count in the controller before it reaches 0, to allow larger transfers to work, but it was conditional on the CDNS_I2C_BROKEN_HOLD_BIT quirk being set on the controller, and ZynqMP uses the r1p14 version of the core where this quirk is not being set. The requirement to do this to support larger reads seems like an inherently required workaround due to the core only having an 8-bit transfer size register, so it does not appear that this should be conditional on the broken HOLD bit quirk which is used elsewhere in the driver. Remove the dependency on the CDNS_I2C_BROKEN_HOLD_BIT for this transfer size reset logic to fix this problem. Fixes: 63cab195bf49 ("i2c: removed work arounds in i2c driver for Zynq Ultrascale+ MPSoC") Signed-off-by: Robert Hancock Reviewed-by: Shubhrajyoti Datta Acked-by: Michal Simek Signed-off-by: Wolfram Sang Signed-off-by: Sasha Levin --- drivers/i2c/busses/i2c-cadence.c | 30 +++++------------------------- 1 file changed, 5 insertions(+), 25 deletions(-) diff --git a/drivers/i2c/busses/i2c-cadence.c b/drivers/i2c/busses/i2c-cadence.c index 3d6f8ee355bfce..630cfa4ddd468c 100644 --- a/drivers/i2c/busses/i2c-cadence.c +++ b/drivers/i2c/busses/i2c-cadence.c @@ -388,9 +388,9 @@ static irqreturn_t cdns_i2c_slave_isr(void *ptr) */ static irqreturn_t cdns_i2c_master_isr(void *ptr) { - unsigned int isr_status, avail_bytes, updatetx; + unsigned int isr_status, avail_bytes; unsigned int bytes_to_send; - bool hold_quirk; + bool updatetx; struct cdns_i2c *id = ptr; /* Signal completion only after everything is updated */ int done_flag = 0; @@ -410,11 +410,7 @@ static irqreturn_t cdns_i2c_master_isr(void *ptr) * Check if transfer size register needs to be updated again for a * large data receive operation. */ - updatetx = 0; - if (id->recv_count > id->curr_recv_count) - updatetx = 1; - - hold_quirk = (id->quirks & CDNS_I2C_BROKEN_HOLD_BIT) && updatetx; + updatetx = id->recv_count > id->curr_recv_count; /* When receiving, handle data interrupt and completion interrupt */ if (id->p_recv_buf && @@ -445,7 +441,7 @@ static irqreturn_t cdns_i2c_master_isr(void *ptr) break; } - if (cdns_is_holdquirk(id, hold_quirk)) + if (cdns_is_holdquirk(id, updatetx)) break; } @@ -456,7 +452,7 @@ static irqreturn_t cdns_i2c_master_isr(void *ptr) * maintain transfer size non-zero while performing a large * receive operation. */ - if (cdns_is_holdquirk(id, hold_quirk)) { + if (cdns_is_holdquirk(id, updatetx)) { /* wait while fifo is full */ while (cdns_i2c_readreg(CDNS_I2C_XFER_SIZE_OFFSET) != (id->curr_recv_count - CDNS_I2C_FIFO_DEPTH)) @@ -478,22 +474,6 @@ static irqreturn_t cdns_i2c_master_isr(void *ptr) CDNS_I2C_XFER_SIZE_OFFSET); id->curr_recv_count = id->recv_count; } - } else if (id->recv_count && !hold_quirk && - !id->curr_recv_count) { - - /* Set the slave address in address register*/ - cdns_i2c_writereg(id->p_msg->addr & CDNS_I2C_ADDR_MASK, - CDNS_I2C_ADDR_OFFSET); - - if (id->recv_count > CDNS_I2C_TRANSFER_SIZE) { - cdns_i2c_writereg(CDNS_I2C_TRANSFER_SIZE, - CDNS_I2C_XFER_SIZE_OFFSET); - id->curr_recv_count = CDNS_I2C_TRANSFER_SIZE; - } else { - cdns_i2c_writereg(id->recv_count, - CDNS_I2C_XFER_SIZE_OFFSET); - id->curr_recv_count = id->recv_count; - } } /* Clear hold (if not repeated start) and signal completion */ From 34e98fce9a9be91aec8b4f43c4f63ab4ce9aed70 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Wed, 13 Jul 2022 15:34:59 +0300 Subject: [PATCH 0836/1056] perf tests: Fix Convert perf time to TSC test for hybrid [ Upstream commit deb44a6249f696106645c63c0603eab08a6122af ] The test does not always correctly determine the number of events for hybrids, nor allow for more than 1 evsel when parsing. Fix by iterating the events actually created and getting the correct evsel for the events processed. Fixes: d9da6f70eb235110 ("perf tests: Support 'Convert perf time to TSC' test for hybrid") Reviewed-by: Kan Liang Signed-off-by: Adrian Hunter Cc: Ian Rogers Cc: Jin Yao Cc: Jiri Olsa Cc: Namhyung Kim Cc: Thomas Richter Link: https://lore.kernel.org/r/20220713123459.24145-3-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/tests/perf-time-to-tsc.c | 18 ++++-------------- 1 file changed, 4 insertions(+), 14 deletions(-) diff --git a/tools/perf/tests/perf-time-to-tsc.c b/tools/perf/tests/perf-time-to-tsc.c index 7c56bc1f4cff0d..89d25befb1711e 100644 --- a/tools/perf/tests/perf-time-to-tsc.c +++ b/tools/perf/tests/perf-time-to-tsc.c @@ -20,8 +20,6 @@ #include "tsc.h" #include "mmap.h" #include "tests.h" -#include "pmu.h" -#include "pmu-hybrid.h" #define CHECK__(x) { \ while ((x) < 0) { \ @@ -84,18 +82,8 @@ int test__perf_time_to_tsc(struct test *test __maybe_unused, int subtest __maybe evlist__config(evlist, &opts, NULL); - evsel = evlist__first(evlist); - - evsel->core.attr.comm = 1; - evsel->core.attr.disabled = 1; - evsel->core.attr.enable_on_exec = 0; - - /* - * For hybrid "cycles:u", it creates two events. - * Init the second evsel here. - */ - if (perf_pmu__has_hybrid() && perf_pmu__hybrid_mounted("cpu_atom")) { - evsel = evsel__next(evsel); + /* For hybrid "cycles:u", it creates two events */ + evlist__for_each_entry(evlist, evsel) { evsel->core.attr.comm = 1; evsel->core.attr.disabled = 1; evsel->core.attr.enable_on_exec = 0; @@ -141,10 +129,12 @@ int test__perf_time_to_tsc(struct test *test __maybe_unused, int subtest __maybe goto next_event; if (strcmp(event->comm.comm, comm1) == 0) { + CHECK_NOT_NULL__(evsel = evlist__event2evsel(evlist, event)); CHECK__(evsel__parse_sample(evsel, event, &sample)); comm1_time = sample.time; } if (strcmp(event->comm.comm, comm2) == 0) { + CHECK_NOT_NULL__(evsel = evlist__event2evsel(evlist, event)); CHECK__(evsel__parse_sample(evsel, event, &sample)); comm2_time = sample.time; } From 7c687a893f5cae5ca40d189635602e93af9bab73 Mon Sep 17 00:00:00 2001 From: Junxiao Chang Date: Fri, 15 Jul 2022 15:47:01 +0800 Subject: [PATCH 0837/1056] net: stmmac: fix dma queue left shift overflow issue [ Upstream commit 613b065ca32e90209024ec4a6bb5ca887ee70980 ] When queue number is > 4, left shift overflows due to 32 bits integer variable. Mask calculation is wrong for MTL_RXQ_DMA_MAP1. If CONFIG_UBSAN is enabled, kernel dumps below warning: [ 10.363842] ================================================================== [ 10.363882] UBSAN: shift-out-of-bounds in /build/linux-intel-iotg-5.15-8e6Tf4/ linux-intel-iotg-5.15-5.15.0/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c:224:12 [ 10.363929] shift exponent 40 is too large for 32-bit type 'unsigned int' [ 10.363953] CPU: 1 PID: 599 Comm: NetworkManager Not tainted 5.15.0-1003-intel-iotg [ 10.363956] Hardware name: ADLINK Technology Inc. LEC-EL/LEC-EL, BIOS 0.15.11 12/22/2021 [ 10.363958] Call Trace: [ 10.363960] [ 10.363963] dump_stack_lvl+0x4a/0x5f [ 10.363971] dump_stack+0x10/0x12 [ 10.363974] ubsan_epilogue+0x9/0x45 [ 10.363976] __ubsan_handle_shift_out_of_bounds.cold+0x61/0x10e [ 10.363979] ? wake_up_klogd+0x4a/0x50 [ 10.363983] ? vprintk_emit+0x8f/0x240 [ 10.363986] dwmac4_map_mtl_dma.cold+0x42/0x91 [stmmac] [ 10.364001] stmmac_mtl_configuration+0x1ce/0x7a0 [stmmac] [ 10.364009] ? dwmac410_dma_init_channel+0x70/0x70 [stmmac] [ 10.364020] stmmac_hw_setup.cold+0xf/0xb14 [stmmac] [ 10.364030] ? page_pool_alloc_pages+0x4d/0x70 [ 10.364034] ? stmmac_clear_tx_descriptors+0x6e/0xe0 [stmmac] [ 10.364042] stmmac_open+0x39e/0x920 [stmmac] [ 10.364050] __dev_open+0xf0/0x1a0 [ 10.364054] __dev_change_flags+0x188/0x1f0 [ 10.364057] dev_change_flags+0x26/0x60 [ 10.364059] do_setlink+0x908/0xc40 [ 10.364062] ? do_setlink+0xb10/0xc40 [ 10.364064] ? __nla_validate_parse+0x4c/0x1a0 [ 10.364068] __rtnl_newlink+0x597/0xa10 [ 10.364072] ? __nla_reserve+0x41/0x50 [ 10.364074] ? __kmalloc_node_track_caller+0x1d0/0x4d0 [ 10.364079] ? pskb_expand_head+0x75/0x310 [ 10.364082] ? nla_reserve_64bit+0x21/0x40 [ 10.364086] ? skb_free_head+0x65/0x80 [ 10.364089] ? security_sock_rcv_skb+0x2c/0x50 [ 10.364094] ? __cond_resched+0x19/0x30 [ 10.364097] ? kmem_cache_alloc_trace+0x15a/0x420 [ 10.364100] rtnl_newlink+0x49/0x70 This change fixes MTL_RXQ_DMA_MAP1 mask issue and channel/queue mapping warning. Fixes: d43042f4da3e ("net: stmmac: mapping mtl rx to dma channel") BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=216195 Reported-by: Cedric Wassenaar Signed-off-by: Junxiao Chang Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c index b2174536898393..412abfabd28bc1 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c @@ -219,6 +219,9 @@ static void dwmac4_map_mtl_dma(struct mac_device_info *hw, u32 queue, u32 chan) if (queue == 0 || queue == 4) { value &= ~MTL_RXQ_DMA_Q04MDMACH_MASK; value |= MTL_RXQ_DMA_Q04MDMACH(chan); + } else if (queue > 4) { + value &= ~MTL_RXQ_DMA_QXMDMACH_MASK(queue - 4); + value |= MTL_RXQ_DMA_QXMDMACH(chan, queue - 4); } else { value &= ~MTL_RXQ_DMA_QXMDMACH_MASK(queue); value |= MTL_RXQ_DMA_QXMDMACH(chan, queue); From 2e8c8309329d8349de33098a6c42cade179f8b42 Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Fri, 15 Jul 2022 11:42:16 +0300 Subject: [PATCH 0838/1056] net/tls: Fix race in TLS device down flow [ Upstream commit f08d8c1bb97c48f24a82afaa2fd8c140f8d3da8b ] Socket destruction flow and tls_device_down function sync against each other using tls_device_lock and the context refcount, to guarantee the device resources are freed via tls_dev_del() by the end of tls_device_down. In the following unfortunate flow, this won't happen: - refcount is decreased to zero in tls_device_sk_destruct. - tls_device_down starts, skips the context as refcount is zero, going all the way until it flushes the gc work, and returns without freeing the device resources. - only then, tls_device_queue_ctx_destruction is called, queues the gc work and frees the context's device resources. Solve it by decreasing the refcount in the socket's destruction flow under the tls_device_lock, for perfect synchronization. This does not slow down the common likely destructor flow, in which both the refcount is decreased and the spinlock is acquired, anyway. Fixes: e8f69799810c ("net/tls: Add generic NIC offload infrastructure") Reviewed-by: Maxim Mikityanskiy Signed-off-by: Tariq Toukan Reviewed-by: Jakub Kicinski Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/tls/tls_device.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c index 4775431cbd3866..4e33150cfb9e1c 100644 --- a/net/tls/tls_device.c +++ b/net/tls/tls_device.c @@ -97,13 +97,16 @@ static void tls_device_queue_ctx_destruction(struct tls_context *ctx) unsigned long flags; spin_lock_irqsave(&tls_device_lock, flags); + if (unlikely(!refcount_dec_and_test(&ctx->refcount))) + goto unlock; + list_move_tail(&ctx->list, &tls_device_gc_list); /* schedule_work inside the spinlock * to make sure tls_device_down waits for that work. */ schedule_work(&tls_device_gc_work); - +unlock: spin_unlock_irqrestore(&tls_device_lock, flags); } @@ -194,8 +197,7 @@ void tls_device_sk_destruct(struct sock *sk) clean_acked_data_disable(inet_csk(sk)); } - if (refcount_dec_and_test(&tls_ctx->refcount)) - tls_device_queue_ctx_destruction(tls_ctx); + tls_device_queue_ctx_destruction(tls_ctx); } EXPORT_SYMBOL_GPL(tls_device_sk_destruct); From 260446eb8e5541402b271343a4516f2b33dec1e4 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Fri, 15 Jul 2022 10:17:41 -0700 Subject: [PATCH 0839/1056] igmp: Fix data-races around sysctl_igmp_llm_reports. [ Upstream commit f6da2267e71106474fbc0943dc24928b9cb79119 ] While reading sysctl_igmp_llm_reports, it can be changed concurrently. Thus, we need to add READ_ONCE() to its readers. This test can be packed into a helper, so such changes will be in the follow-up series after net is merged into net-next. if (ipv4_is_local_multicast(pmc->multiaddr) && !READ_ONCE(net->ipv4.sysctl_igmp_llm_reports)) Fixes: df2cf4a78e48 ("IGMP: Inhibit reports for local multicast groups") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/ipv4/igmp.c | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 930f6c41f519ce..ccfbc0a8f11c8a 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -467,7 +467,8 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc, if (pmc->multiaddr == IGMP_ALL_HOSTS) return skb; - if (ipv4_is_local_multicast(pmc->multiaddr) && !net->ipv4.sysctl_igmp_llm_reports) + if (ipv4_is_local_multicast(pmc->multiaddr) && + !READ_ONCE(net->ipv4.sysctl_igmp_llm_reports)) return skb; mtu = READ_ONCE(dev->mtu); @@ -593,7 +594,7 @@ static int igmpv3_send_report(struct in_device *in_dev, struct ip_mc_list *pmc) if (pmc->multiaddr == IGMP_ALL_HOSTS) continue; if (ipv4_is_local_multicast(pmc->multiaddr) && - !net->ipv4.sysctl_igmp_llm_reports) + !READ_ONCE(net->ipv4.sysctl_igmp_llm_reports)) continue; spin_lock_bh(&pmc->lock); if (pmc->sfcount[MCAST_EXCLUDE]) @@ -736,7 +737,8 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc, if (type == IGMPV3_HOST_MEMBERSHIP_REPORT) return igmpv3_send_report(in_dev, pmc); - if (ipv4_is_local_multicast(group) && !net->ipv4.sysctl_igmp_llm_reports) + if (ipv4_is_local_multicast(group) && + !READ_ONCE(net->ipv4.sysctl_igmp_llm_reports)) return 0; if (type == IGMP_HOST_LEAVE_MESSAGE) @@ -920,7 +922,8 @@ static bool igmp_heard_report(struct in_device *in_dev, __be32 group) if (group == IGMP_ALL_HOSTS) return false; - if (ipv4_is_local_multicast(group) && !net->ipv4.sysctl_igmp_llm_reports) + if (ipv4_is_local_multicast(group) && + !READ_ONCE(net->ipv4.sysctl_igmp_llm_reports)) return false; rcu_read_lock(); @@ -1045,7 +1048,7 @@ static bool igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb, if (im->multiaddr == IGMP_ALL_HOSTS) continue; if (ipv4_is_local_multicast(im->multiaddr) && - !net->ipv4.sysctl_igmp_llm_reports) + !READ_ONCE(net->ipv4.sysctl_igmp_llm_reports)) continue; spin_lock_bh(&im->lock); if (im->tm_running) @@ -1296,7 +1299,8 @@ static void __igmp_group_dropped(struct ip_mc_list *im, gfp_t gfp) #ifdef CONFIG_IP_MULTICAST if (im->multiaddr == IGMP_ALL_HOSTS) return; - if (ipv4_is_local_multicast(im->multiaddr) && !net->ipv4.sysctl_igmp_llm_reports) + if (ipv4_is_local_multicast(im->multiaddr) && + !READ_ONCE(net->ipv4.sysctl_igmp_llm_reports)) return; reporter = im->reporter; @@ -1338,7 +1342,8 @@ static void igmp_group_added(struct ip_mc_list *im) #ifdef CONFIG_IP_MULTICAST if (im->multiaddr == IGMP_ALL_HOSTS) return; - if (ipv4_is_local_multicast(im->multiaddr) && !net->ipv4.sysctl_igmp_llm_reports) + if (ipv4_is_local_multicast(im->multiaddr) && + !READ_ONCE(net->ipv4.sysctl_igmp_llm_reports)) return; if (in_dev->dead) @@ -1642,7 +1647,7 @@ static void ip_mc_rejoin_groups(struct in_device *in_dev) if (im->multiaddr == IGMP_ALL_HOSTS) continue; if (ipv4_is_local_multicast(im->multiaddr) && - !net->ipv4.sysctl_igmp_llm_reports) + !READ_ONCE(net->ipv4.sysctl_igmp_llm_reports)) continue; /* a failover is happening and switches From 2a408a4f423eb531b4aa47fea14ef638d12ba986 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Fri, 15 Jul 2022 10:17:42 -0700 Subject: [PATCH 0840/1056] igmp: Fix a data-race around sysctl_igmp_max_memberships. [ Upstream commit 6305d821e3b9b5379d348528e5b5faf316383bc2 ] While reading sysctl_igmp_max_memberships, it can be changed concurrently. Thus, we need to add READ_ONCE() to its reader. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/ipv4/igmp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index ccfbc0a8f11c8a..8920ae3751d1cd 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -2197,7 +2197,7 @@ static int __ip_mc_join_group(struct sock *sk, struct ip_mreqn *imr, count++; } err = -ENOBUFS; - if (count >= net->ipv4.sysctl_igmp_max_memberships) + if (count >= READ_ONCE(net->ipv4.sysctl_igmp_max_memberships)) goto done; iml = sock_kmalloc(sk, sizeof(*iml), GFP_KERNEL); if (!iml) From 849450279dfcbdfcadfc8b8552e0fe10a8edf33d Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Fri, 15 Jul 2022 10:17:43 -0700 Subject: [PATCH 0841/1056] igmp: Fix data-races around sysctl_igmp_max_msf. [ Upstream commit 6ae0f2e553737b8cce49a1372573c81130ffa80e ] While reading sysctl_igmp_max_msf, it can be changed concurrently. Thus, we need to add READ_ONCE() to its readers. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/ipv4/igmp.c | 2 +- net/ipv4/ip_sockglue.c | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 8920ae3751d1cd..9f4674244aff33 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -2384,7 +2384,7 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct } /* else, add a new source to the filter */ - if (psl && psl->sl_count >= net->ipv4.sysctl_igmp_max_msf) { + if (psl && psl->sl_count >= READ_ONCE(net->ipv4.sysctl_igmp_max_msf)) { err = -ENOBUFS; goto done; } diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 8268e427f8893d..38f296afb663d4 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -782,7 +782,7 @@ static int ip_set_mcast_msfilter(struct sock *sk, sockptr_t optval, int optlen) /* numsrc >= (4G-140)/128 overflow in 32 bits */ err = -ENOBUFS; if (gsf->gf_numsrc >= 0x1ffffff || - gsf->gf_numsrc > sock_net(sk)->ipv4.sysctl_igmp_max_msf) + gsf->gf_numsrc > READ_ONCE(sock_net(sk)->ipv4.sysctl_igmp_max_msf)) goto out_free_gsf; err = -EINVAL; @@ -832,7 +832,7 @@ static int compat_ip_set_mcast_msfilter(struct sock *sk, sockptr_t optval, /* numsrc >= (4G-140)/128 overflow in 32 bits */ err = -ENOBUFS; - if (n > sock_net(sk)->ipv4.sysctl_igmp_max_msf) + if (n > READ_ONCE(sock_net(sk)->ipv4.sysctl_igmp_max_msf)) goto out_free_gsf; err = set_mcast_msfilter(sk, gf32->gf_interface, n, gf32->gf_fmode, &gf32->gf_group, gf32->gf_slist_flex); @@ -1242,7 +1242,7 @@ static int do_ip_setsockopt(struct sock *sk, int level, int optname, } /* numsrc >= (1G-4) overflow in 32 bits */ if (msf->imsf_numsrc >= 0x3ffffffcU || - msf->imsf_numsrc > net->ipv4.sysctl_igmp_max_msf) { + msf->imsf_numsrc > READ_ONCE(net->ipv4.sysctl_igmp_max_msf)) { kfree(msf); err = -ENOBUFS; break; From 906beda70c2f12a452d202692f0d59f7a40e1d9c Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Fri, 15 Jul 2022 10:17:45 -0700 Subject: [PATCH 0842/1056] tcp: Fix data-races around keepalive sysctl knobs. [ Upstream commit f2f316e287e6c2e3a1c5bab8d9b77ee03daa0463 ] While reading sysctl_tcp_keepalive_(time|probes|intvl), they can be changed concurrently. Thus, we need to add READ_ONCE() to their readers. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- include/net/tcp.h | 9 ++++++--- net/smc/smc_llc.c | 2 +- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index 3b97db2d438f5c..cae0c9102edaaa 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1469,21 +1469,24 @@ static inline int keepalive_intvl_when(const struct tcp_sock *tp) { struct net *net = sock_net((struct sock *)tp); - return tp->keepalive_intvl ? : net->ipv4.sysctl_tcp_keepalive_intvl; + return tp->keepalive_intvl ? : + READ_ONCE(net->ipv4.sysctl_tcp_keepalive_intvl); } static inline int keepalive_time_when(const struct tcp_sock *tp) { struct net *net = sock_net((struct sock *)tp); - return tp->keepalive_time ? : net->ipv4.sysctl_tcp_keepalive_time; + return tp->keepalive_time ? : + READ_ONCE(net->ipv4.sysctl_tcp_keepalive_time); } static inline int keepalive_probes(const struct tcp_sock *tp) { struct net *net = sock_net((struct sock *)tp); - return tp->keepalive_probes ? : net->ipv4.sysctl_tcp_keepalive_probes; + return tp->keepalive_probes ? : + READ_ONCE(net->ipv4.sysctl_tcp_keepalive_probes); } static inline u32 keepalive_time_elapsed(const struct tcp_sock *tp) diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c index ee1f0fdba08558..0ef15f8fba9020 100644 --- a/net/smc/smc_llc.c +++ b/net/smc/smc_llc.c @@ -1787,7 +1787,7 @@ void smc_llc_lgr_init(struct smc_link_group *lgr, struct smc_sock *smc) init_waitqueue_head(&lgr->llc_flow_waiter); init_waitqueue_head(&lgr->llc_msg_waiter); mutex_init(&lgr->llc_conf_mutex); - lgr->llc_testlink_time = net->ipv4.sysctl_tcp_keepalive_time; + lgr->llc_testlink_time = READ_ONCE(net->ipv4.sysctl_tcp_keepalive_time); } /* called after lgr was removed from lgr_list */ From 78522951d71844aeb851be35e882bdc993f359eb Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Fri, 15 Jul 2022 10:17:46 -0700 Subject: [PATCH 0843/1056] tcp: Fix data-races around sysctl_tcp_syn(ack)?_retries. [ Upstream commit 20a3b1c0f603e8c55c3396abd12dfcfb523e4d3c ] While reading sysctl_tcp_syn(ack)?_retries, they can be changed concurrently. Thus, we need to add READ_ONCE() to their readers. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/ipv4/inet_connection_sock.c | 3 ++- net/ipv4/tcp.c | 3 ++- net/ipv4/tcp_timer.c | 10 +++++++--- 3 files changed, 11 insertions(+), 5 deletions(-) diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index d3bbb344bbe12b..a53f9bf7886f08 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -829,7 +829,8 @@ static void reqsk_timer_handler(struct timer_list *t) icsk = inet_csk(sk_listener); net = sock_net(sk_listener); - max_syn_ack_retries = icsk->icsk_syn_retries ? : net->ipv4.sysctl_tcp_synack_retries; + max_syn_ack_retries = icsk->icsk_syn_retries ? : + READ_ONCE(net->ipv4.sysctl_tcp_synack_retries); /* Normally all the openreqs are young and become mature * (i.e. converted to established socket) for first timeout. * If synack was not acknowledged for 1 second, it means diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 4ac53c8f0583a9..e22a61b2ba82c8 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3974,7 +3974,8 @@ static int do_tcp_getsockopt(struct sock *sk, int level, val = keepalive_probes(tp); break; case TCP_SYNCNT: - val = icsk->icsk_syn_retries ? : net->ipv4.sysctl_tcp_syn_retries; + val = icsk->icsk_syn_retries ? : + READ_ONCE(net->ipv4.sysctl_tcp_syn_retries); break; case TCP_LINGER2: val = tp->linger2; diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 4f3b9ab222b6e0..a234704e8163ff 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -239,7 +239,8 @@ static int tcp_write_timeout(struct sock *sk) if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) { if (icsk->icsk_retransmits) __dst_negative_advice(sk); - retry_until = icsk->icsk_syn_retries ? : net->ipv4.sysctl_tcp_syn_retries; + retry_until = icsk->icsk_syn_retries ? : + READ_ONCE(net->ipv4.sysctl_tcp_syn_retries); expired = icsk->icsk_retransmits >= retry_until; } else { if (retransmits_timed_out(sk, net->ipv4.sysctl_tcp_retries1, 0)) { @@ -406,12 +407,15 @@ abort: tcp_write_err(sk); static void tcp_fastopen_synack_timer(struct sock *sk, struct request_sock *req) { struct inet_connection_sock *icsk = inet_csk(sk); - int max_retries = icsk->icsk_syn_retries ? : - sock_net(sk)->ipv4.sysctl_tcp_synack_retries + 1; /* add one more retry for fastopen */ struct tcp_sock *tp = tcp_sk(sk); + int max_retries; req->rsk_ops->syn_ack_timeout(req); + /* add one more retry for fastopen */ + max_retries = icsk->icsk_syn_retries ? : + READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_synack_retries) + 1; + if (req->num_timeout >= max_retries) { tcp_write_err(sk); return; From 10a9ba97a40490f586a566c83e5774474db8bdd7 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Fri, 15 Jul 2022 10:17:47 -0700 Subject: [PATCH 0844/1056] tcp: Fix data-races around sysctl_tcp_syncookies. [ Upstream commit f2e383b5bb6bbc60a0b94b87b3e49a2b1aefd11e ] While reading sysctl_tcp_syncookies, it can be changed concurrently. Thus, we need to add READ_ONCE() to its readers. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/core/filter.c | 4 ++-- net/ipv4/syncookies.c | 3 ++- net/ipv4/tcp_input.c | 20 ++++++++++++-------- net/ipv6/syncookies.c | 3 ++- 4 files changed, 18 insertions(+), 12 deletions(-) diff --git a/net/core/filter.c b/net/core/filter.c index 8b2bc855714b5a..ac64395611ae3c 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -6734,7 +6734,7 @@ BPF_CALL_5(bpf_tcp_check_syncookie, struct sock *, sk, void *, iph, u32, iph_len if (sk->sk_protocol != IPPROTO_TCP || sk->sk_state != TCP_LISTEN) return -EINVAL; - if (!sock_net(sk)->ipv4.sysctl_tcp_syncookies) + if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_syncookies)) return -EINVAL; if (!th->ack || th->rst || th->syn) @@ -6809,7 +6809,7 @@ BPF_CALL_5(bpf_tcp_gen_syncookie, struct sock *, sk, void *, iph, u32, iph_len, if (sk->sk_protocol != IPPROTO_TCP || sk->sk_state != TCP_LISTEN) return -EINVAL; - if (!sock_net(sk)->ipv4.sysctl_tcp_syncookies) + if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_syncookies)) return -ENOENT; if (!th->syn || th->ack || th->fin || th->rst) diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index fd1dc86ba5124d..8eee771d2acacc 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -342,7 +342,8 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) struct flowi4 fl4; u32 tsoff = 0; - if (!sock_net(sk)->ipv4.sysctl_tcp_syncookies || !th->ack || th->rst) + if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_syncookies) || + !th->ack || th->rst) goto out; if (tcp_synq_no_recent_overflow(sk)) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index ae06923fe8d056..134e36f46e9144 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -6735,11 +6735,14 @@ static bool tcp_syn_flood_action(const struct sock *sk, const char *proto) { struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue; const char *msg = "Dropping request"; - bool want_cookie = false; struct net *net = sock_net(sk); + bool want_cookie = false; + u8 syncookies; + + syncookies = READ_ONCE(net->ipv4.sysctl_tcp_syncookies); #ifdef CONFIG_SYN_COOKIES - if (net->ipv4.sysctl_tcp_syncookies) { + if (syncookies) { msg = "Sending cookies"; want_cookie = true; __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPREQQFULLDOCOOKIES); @@ -6747,8 +6750,7 @@ static bool tcp_syn_flood_action(const struct sock *sk, const char *proto) #endif __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPREQQFULLDROP); - if (!queue->synflood_warned && - net->ipv4.sysctl_tcp_syncookies != 2 && + if (!queue->synflood_warned && syncookies != 2 && xchg(&queue->synflood_warned, 1) == 0) net_info_ratelimited("%s: Possible SYN flooding on port %d. %s. Check SNMP counters.\n", proto, sk->sk_num, msg); @@ -6797,7 +6799,7 @@ u16 tcp_get_syncookie_mss(struct request_sock_ops *rsk_ops, struct tcp_sock *tp = tcp_sk(sk); u16 mss; - if (sock_net(sk)->ipv4.sysctl_tcp_syncookies != 2 && + if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_syncookies) != 2 && !inet_csk_reqsk_queue_is_full(sk)) return 0; @@ -6831,13 +6833,15 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, bool want_cookie = false; struct dst_entry *dst; struct flowi fl; + u8 syncookies; + + syncookies = READ_ONCE(net->ipv4.sysctl_tcp_syncookies); /* TW buckets are converted to open requests without * limitations, they conserve resources and peer is * evidently real one. */ - if ((net->ipv4.sysctl_tcp_syncookies == 2 || - inet_csk_reqsk_queue_is_full(sk)) && !isn) { + if ((syncookies == 2 || inet_csk_reqsk_queue_is_full(sk)) && !isn) { want_cookie = tcp_syn_flood_action(sk, rsk_ops->slab_name); if (!want_cookie) goto drop; @@ -6887,7 +6891,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, if (!want_cookie && !isn) { /* Kill the following clause, if you dislike this way. */ - if (!net->ipv4.sysctl_tcp_syncookies && + if (!syncookies && (net->ipv4.sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) < (net->ipv4.sysctl_max_syn_backlog >> 2)) && !tcp_peer_is_proven(req, dst)) { diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index ca92dd6981dea9..12ae817aaf2ec8 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -141,7 +141,8 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) __u8 rcv_wscale; u32 tsoff = 0; - if (!sock_net(sk)->ipv4.sysctl_tcp_syncookies || !th->ack || th->rst) + if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_syncookies) || + !th->ack || th->rst) goto out; if (tcp_synq_no_recent_overflow(sk)) From fcf6c6d8aeffebca66f37b17ef1b57112e5e09c1 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Fri, 15 Jul 2022 10:17:48 -0700 Subject: [PATCH 0845/1056] tcp: Fix data-races around sysctl_tcp_migrate_req. [ Upstream commit 4177f545895b1da08447a80692f30617154efa6e ] While reading sysctl_tcp_migrate_req, it can be changed concurrently. Thus, we need to add READ_ONCE() to its readers. Fixes: f9ac779f881c ("net: Introduce net.ipv4.tcp_migrate_req.") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/core/sock_reuseport.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/core/sock_reuseport.c b/net/core/sock_reuseport.c index 3f00a28fe762af..5daa1fa542490e 100644 --- a/net/core/sock_reuseport.c +++ b/net/core/sock_reuseport.c @@ -387,7 +387,7 @@ void reuseport_stop_listen_sock(struct sock *sk) prog = rcu_dereference_protected(reuse->prog, lockdep_is_held(&reuseport_lock)); - if (sock_net(sk)->ipv4.sysctl_tcp_migrate_req || + if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_migrate_req) || (prog && prog->expected_attach_type == BPF_SK_REUSEPORT_SELECT_OR_MIGRATE)) { /* Migration capable, move sk from the listening section * to the closed section. @@ -545,7 +545,7 @@ struct sock *reuseport_migrate_sock(struct sock *sk, hash = migrating_sk->sk_hash; prog = rcu_dereference(reuse->prog); if (!prog || prog->expected_attach_type != BPF_SK_REUSEPORT_SELECT_OR_MIGRATE) { - if (sock_net(sk)->ipv4.sysctl_tcp_migrate_req) + if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_migrate_req)) goto select_by_hash; goto failure; } From f6ce6556958c080df6ac123b9d5371b170c4f15b Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Fri, 15 Jul 2022 10:17:49 -0700 Subject: [PATCH 0846/1056] tcp: Fix data-races around sysctl_tcp_reordering. [ Upstream commit 46778cd16e6a5ad1b2e3a91f6c057c907379418e ] While reading sysctl_tcp_reordering, it can be changed concurrently. Thus, we need to add READ_ONCE() to its readers. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/ipv4/tcp.c | 2 +- net/ipv4/tcp_input.c | 10 +++++++--- net/ipv4/tcp_metrics.c | 3 ++- 3 files changed, 10 insertions(+), 5 deletions(-) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index e22a61b2ba82c8..480fac19a074f3 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -447,7 +447,7 @@ void tcp_init_sock(struct sock *sk) tp->snd_cwnd_clamp = ~0; tp->mss_cache = TCP_MSS_DEFAULT; - tp->reordering = sock_net(sk)->ipv4.sysctl_tcp_reordering; + tp->reordering = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reordering); tcp_assign_congestion_control(sk); tp->tsoffset = 0; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 134e36f46e9144..06802295e170e9 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2131,6 +2131,7 @@ void tcp_enter_loss(struct sock *sk) struct tcp_sock *tp = tcp_sk(sk); struct net *net = sock_net(sk); bool new_recovery = icsk->icsk_ca_state < TCP_CA_Recovery; + u8 reordering; tcp_timeout_mark_lost(sk); @@ -2151,10 +2152,12 @@ void tcp_enter_loss(struct sock *sk) /* Timeout in disordered state after receiving substantial DUPACKs * suggests that the degree of reordering is over-estimated. */ + reordering = READ_ONCE(net->ipv4.sysctl_tcp_reordering); if (icsk->icsk_ca_state <= TCP_CA_Disorder && - tp->sacked_out >= net->ipv4.sysctl_tcp_reordering) + tp->sacked_out >= reordering) tp->reordering = min_t(unsigned int, tp->reordering, - net->ipv4.sysctl_tcp_reordering); + reordering); + tcp_set_ca_state(sk, TCP_CA_Loss); tp->high_seq = tp->snd_nxt; tcp_ecn_queue_cwr(tp); @@ -3457,7 +3460,8 @@ static inline bool tcp_may_raise_cwnd(const struct sock *sk, const int flag) * new SACK or ECE mark may first advance cwnd here and later reduce * cwnd in tcp_fastretrans_alert() based on more states. */ - if (tcp_sk(sk)->reordering > sock_net(sk)->ipv4.sysctl_tcp_reordering) + if (tcp_sk(sk)->reordering > + READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reordering)) return flag & FLAG_FORWARD_PROGRESS; return flag & FLAG_DATA_ACKED; diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index 7029b0e98edb28..a501150deaa3b3 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -428,7 +428,8 @@ void tcp_update_metrics(struct sock *sk) if (!tcp_metric_locked(tm, TCP_METRIC_REORDERING)) { val = tcp_metric_get(tm, TCP_METRIC_REORDERING); if (val < tp->reordering && - tp->reordering != net->ipv4.sysctl_tcp_reordering) + tp->reordering != + READ_ONCE(net->ipv4.sysctl_tcp_reordering)) tcp_metric_set(tm, TCP_METRIC_REORDERING, tp->reordering); } From e816f8024617afd73cc755e79e4e15b3ceabc4f8 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Fri, 15 Jul 2022 10:17:50 -0700 Subject: [PATCH 0847/1056] tcp: Fix data-races around some timeout sysctl knobs. [ Upstream commit 39e24435a776e9de5c6dd188836cf2523547804b ] While reading these sysctl knobs, they can be changed concurrently. Thus, we need to add READ_ONCE() to their readers. - tcp_retries1 - tcp_retries2 - tcp_orphan_retries - tcp_fin_timeout Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- include/net/tcp.h | 3 ++- net/ipv4/tcp.c | 2 +- net/ipv4/tcp_output.c | 2 +- net/ipv4/tcp_timer.c | 10 +++++----- 4 files changed, 9 insertions(+), 8 deletions(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index cae0c9102edaaa..caecc020e521c8 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1499,7 +1499,8 @@ static inline u32 keepalive_time_elapsed(const struct tcp_sock *tp) static inline int tcp_fin_time(const struct sock *sk) { - int fin_timeout = tcp_sk(sk)->linger2 ? : sock_net(sk)->ipv4.sysctl_tcp_fin_timeout; + int fin_timeout = tcp_sk(sk)->linger2 ? : + READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fin_timeout); const int rto = inet_csk(sk)->icsk_rto; if (fin_timeout < (rto << 2) - (rto >> 1)) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 480fac19a074f3..f853f34dfb79fa 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3980,7 +3980,7 @@ static int do_tcp_getsockopt(struct sock *sk, int level, case TCP_LINGER2: val = tp->linger2; if (val >= 0) - val = (val ? : net->ipv4.sysctl_tcp_fin_timeout) / HZ; + val = (val ? : READ_ONCE(net->ipv4.sysctl_tcp_fin_timeout)) / HZ; break; case TCP_DEFER_ACCEPT: val = retrans_to_secs(icsk->icsk_accept_queue.rskq_defer_accept, diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 3fa2bfbc250d4e..fcccf56ae9f79b 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -4093,7 +4093,7 @@ void tcp_send_probe0(struct sock *sk) icsk->icsk_probes_out++; if (err <= 0) { - if (icsk->icsk_backoff < net->ipv4.sysctl_tcp_retries2) + if (icsk->icsk_backoff < READ_ONCE(net->ipv4.sysctl_tcp_retries2)) icsk->icsk_backoff++; timeout = tcp_probe0_when(sk, TCP_RTO_MAX); } else { diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index a234704e8163ff..ec5277becc6a1f 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -143,7 +143,7 @@ static int tcp_out_of_resources(struct sock *sk, bool do_reset) */ static int tcp_orphan_retries(struct sock *sk, bool alive) { - int retries = sock_net(sk)->ipv4.sysctl_tcp_orphan_retries; /* May be zero. */ + int retries = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_orphan_retries); /* May be zero. */ /* We know from an ICMP that something is wrong. */ if (sk->sk_err_soft && !alive) @@ -243,14 +243,14 @@ static int tcp_write_timeout(struct sock *sk) READ_ONCE(net->ipv4.sysctl_tcp_syn_retries); expired = icsk->icsk_retransmits >= retry_until; } else { - if (retransmits_timed_out(sk, net->ipv4.sysctl_tcp_retries1, 0)) { + if (retransmits_timed_out(sk, READ_ONCE(net->ipv4.sysctl_tcp_retries1), 0)) { /* Black hole detection */ tcp_mtu_probing(icsk, sk); __dst_negative_advice(sk); } - retry_until = net->ipv4.sysctl_tcp_retries2; + retry_until = READ_ONCE(net->ipv4.sysctl_tcp_retries2); if (sock_flag(sk, SOCK_DEAD)) { const bool alive = icsk->icsk_rto < TCP_RTO_MAX; @@ -381,7 +381,7 @@ static void tcp_probe_timer(struct sock *sk) msecs_to_jiffies(icsk->icsk_user_timeout)) goto abort; - max_probes = sock_net(sk)->ipv4.sysctl_tcp_retries2; + max_probes = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_retries2); if (sock_flag(sk, SOCK_DEAD)) { const bool alive = inet_csk_rto_backoff(icsk, TCP_RTO_MAX) < TCP_RTO_MAX; @@ -589,7 +589,7 @@ void tcp_retransmit_timer(struct sock *sk) } inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, tcp_clamp_rto_to_user_timeout(sk), TCP_RTO_MAX); - if (retransmits_timed_out(sk, net->ipv4.sysctl_tcp_retries1 + 1, 0)) + if (retransmits_timed_out(sk, READ_ONCE(net->ipv4.sysctl_tcp_retries1) + 1, 0)) __sk_dst_reset(sk); out:; From 80d4d0c461674eea87f0977e12a2ecd334b9b79c Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Fri, 15 Jul 2022 10:17:51 -0700 Subject: [PATCH 0848/1056] tcp: Fix a data-race around sysctl_tcp_notsent_lowat. [ Upstream commit 55be873695ed8912eb77ff46d1d1cadf028bd0f3 ] While reading sysctl_tcp_notsent_lowat, it can be changed concurrently. Thus, we need to add READ_ONCE() to its reader. Fixes: c9bee3b7fdec ("tcp: TCP_NOTSENT_LOWAT socket option") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- include/net/tcp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index caecc020e521c8..0c609d10c32046 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1994,7 +1994,7 @@ void __tcp_v4_send_check(struct sk_buff *skb, __be32 saddr, __be32 daddr); static inline u32 tcp_notsent_lowat(const struct tcp_sock *tp) { struct net *net = sock_net((struct sock *)tp); - return tp->notsent_lowat ?: net->ipv4.sysctl_tcp_notsent_lowat; + return tp->notsent_lowat ?: READ_ONCE(net->ipv4.sysctl_tcp_notsent_lowat); } bool tcp_stream_memory_free(const struct sock *sk, int wake); From 58d5ea71aaa66767a9d2dbb527c8fdc8ca0c98fe Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Fri, 15 Jul 2022 10:17:52 -0700 Subject: [PATCH 0849/1056] tcp: Fix a data-race around sysctl_tcp_tw_reuse. [ Upstream commit cbfc6495586a3f09f6f07d9fb3c7cafe807e3c55 ] While reading sysctl_tcp_tw_reuse, it can be changed concurrently. Thus, we need to add READ_ONCE() to its reader. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/ipv4/tcp_ipv4.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 235ae91bfd5a06..fba02cf6b4686f 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -108,10 +108,10 @@ static u32 tcp_v4_init_ts_off(const struct net *net, const struct sk_buff *skb) int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp) { + int reuse = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_tw_reuse); const struct inet_timewait_sock *tw = inet_twsk(sktw); const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw); struct tcp_sock *tp = tcp_sk(sk); - int reuse = sock_net(sk)->ipv4.sysctl_tcp_tw_reuse; if (reuse == 2) { /* Still does not detect *everything* that goes through From 3ae85dc62a023ca0956f8f26628494af51a6a824 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Fri, 15 Jul 2022 10:17:53 -0700 Subject: [PATCH 0850/1056] tcp: Fix data-races around sysctl_max_syn_backlog. [ Upstream commit 79539f34743d3e14cc1fa6577d326a82cc64d62f ] While reading sysctl_max_syn_backlog, it can be changed concurrently. Thus, we need to add READ_ONCE() to its readers. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/ipv4/tcp_input.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 06802295e170e9..d9e534c6fd0cc8 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -6894,10 +6894,12 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, tcp_rsk(req)->ts_off = af_ops->init_ts_off(net, skb); if (!want_cookie && !isn) { + int max_syn_backlog = READ_ONCE(net->ipv4.sysctl_max_syn_backlog); + /* Kill the following clause, if you dislike this way. */ if (!syncookies && - (net->ipv4.sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) < - (net->ipv4.sysctl_max_syn_backlog >> 2)) && + (max_syn_backlog - inet_csk_reqsk_queue_len(sk) < + (max_syn_backlog >> 2)) && !tcp_peer_is_proven(req, dst)) { /* Without syncookies last quarter of * backlog is filled with destinations, From 539d9ab79eba3974b479cad61a8688c41fe62e12 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Fri, 15 Jul 2022 10:17:54 -0700 Subject: [PATCH 0851/1056] tcp: Fix data-races around sysctl_tcp_fastopen. [ Upstream commit 5a54213318c43f4009ae158347aa6016e3b9b55a ] While reading sysctl_tcp_fastopen, it can be changed concurrently. Thus, we need to add READ_ONCE() to its readers. Fixes: 2100c8d2d9db ("net-tcp: Fast Open base") Signed-off-by: Kuniyuki Iwashima Acked-by: Yuchung Cheng Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/ipv4/af_inet.c | 2 +- net/ipv4/tcp.c | 6 ++++-- net/ipv4/tcp_fastopen.c | 4 ++-- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 781c595f6880d5..e4b2ced66261b0 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -220,7 +220,7 @@ int inet_listen(struct socket *sock, int backlog) * because the socket was in TCP_LISTEN state previously but * was shutdown() rather than close(). */ - tcp_fastopen = sock_net(sk)->ipv4.sysctl_tcp_fastopen; + tcp_fastopen = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fastopen); if ((tcp_fastopen & TFO_SERVER_WO_SOCKOPT1) && (tcp_fastopen & TFO_SERVER_ENABLE) && !inet_csk(sk)->icsk_accept_queue.fastopenq.max_qlen) { diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index f853f34dfb79fa..1abdb871265596 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1159,7 +1159,8 @@ static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, struct sockaddr *uaddr = msg->msg_name; int err, flags; - if (!(sock_net(sk)->ipv4.sysctl_tcp_fastopen & TFO_CLIENT_ENABLE) || + if (!(READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fastopen) & + TFO_CLIENT_ENABLE) || (uaddr && msg->msg_namelen >= sizeof(uaddr->sa_family) && uaddr->sa_family == AF_UNSPEC)) return -EOPNOTSUPP; @@ -3626,7 +3627,8 @@ static int do_tcp_setsockopt(struct sock *sk, int level, int optname, case TCP_FASTOPEN_CONNECT: if (val > 1 || val < 0) { err = -EINVAL; - } else if (net->ipv4.sysctl_tcp_fastopen & TFO_CLIENT_ENABLE) { + } else if (READ_ONCE(net->ipv4.sysctl_tcp_fastopen) & + TFO_CLIENT_ENABLE) { if (sk->sk_state == TCP_CLOSE) tp->fastopen_connect = val; else diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index 59412d6354a01a..936544a4753e97 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c @@ -338,7 +338,7 @@ static bool tcp_fastopen_no_cookie(const struct sock *sk, const struct dst_entry *dst, int flag) { - return (sock_net(sk)->ipv4.sysctl_tcp_fastopen & flag) || + return (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fastopen) & flag) || tcp_sk(sk)->fastopen_no_cookie || (dst && dst_metric(dst, RTAX_FASTOPEN_NO_COOKIE)); } @@ -353,7 +353,7 @@ struct sock *tcp_try_fastopen(struct sock *sk, struct sk_buff *skb, const struct dst_entry *dst) { bool syn_data = TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq + 1; - int tcp_fastopen = sock_net(sk)->ipv4.sysctl_tcp_fastopen; + int tcp_fastopen = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fastopen); struct tcp_fastopen_cookie valid_foc = { .len = -1 }; struct sock *child; int ret = 0; From a77a75a0e7f397550ab039f96115103e78dd5c69 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Fri, 15 Jul 2022 10:17:55 -0700 Subject: [PATCH 0852/1056] tcp: Fix data-races around sysctl_tcp_fastopen_blackhole_timeout. [ Upstream commit 021266ec640c7a4527e6cd4b7349a512b351de1d ] While reading sysctl_tcp_fastopen_blackhole_timeout, it can be changed concurrently. Thus, we need to add READ_ONCE() to its readers. Fixes: cf1ef3f0719b ("net/tcp_fastopen: Disable active side TFO in certain scenarios") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/ipv4/tcp_fastopen.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index 936544a4753e97..6e0a8ef5e816f3 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c @@ -495,7 +495,7 @@ void tcp_fastopen_active_disable(struct sock *sk) { struct net *net = sock_net(sk); - if (!sock_net(sk)->ipv4.sysctl_tcp_fastopen_blackhole_timeout) + if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fastopen_blackhole_timeout)) return; /* Paired with READ_ONCE() in tcp_fastopen_active_should_disable() */ @@ -516,7 +516,8 @@ void tcp_fastopen_active_disable(struct sock *sk) */ bool tcp_fastopen_active_should_disable(struct sock *sk) { - unsigned int tfo_bh_timeout = sock_net(sk)->ipv4.sysctl_tcp_fastopen_blackhole_timeout; + unsigned int tfo_bh_timeout = + READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fastopen_blackhole_timeout); unsigned long timeout; int tfo_da_times; int multiplier; From 2918419c06088f6709ceb543feb01752779ade4c Mon Sep 17 00:00:00 2001 From: Przemyslaw Patynowski Date: Fri, 24 Jun 2022 17:33:01 -0700 Subject: [PATCH 0853/1056] iavf: Fix handling of dummy receive descriptors [ Upstream commit a9f49e0060301a9bfebeca76739158d0cf91cdf6 ] Fix memory leak caused by not handling dummy receive descriptor properly. iavf_get_rx_buffer now sets the rx_buffer return value for dummy receive descriptors. Without this patch, when the hardware writes a dummy descriptor, iavf would not free the page allocated for the previous receive buffer. This is an unlikely event but can still happen. [Jesse: massaged commit message] Fixes: efa14c398582 ("iavf: allow null RX descriptors") Signed-off-by: Przemyslaw Patynowski Signed-off-by: Jesse Brandeburg Tested-by: Konrad Jankowski Signed-off-by: Tony Nguyen Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/iavf/iavf_txrx.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/intel/iavf/iavf_txrx.c b/drivers/net/ethernet/intel/iavf/iavf_txrx.c index 3525eab8e9f9a8..5448ed0e0357f6 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_txrx.c +++ b/drivers/net/ethernet/intel/iavf/iavf_txrx.c @@ -1250,11 +1250,10 @@ static struct iavf_rx_buffer *iavf_get_rx_buffer(struct iavf_ring *rx_ring, { struct iavf_rx_buffer *rx_buffer; - if (!size) - return NULL; - rx_buffer = &rx_ring->rx_bi[rx_ring->next_to_clean]; prefetchw(rx_buffer->page); + if (!size) + return rx_buffer; /* we are reusing so sync this buffer for CPU use */ dma_sync_single_range_for_cpu(rx_ring->dev, From 2cbb165131766799297333221d383cd08e938cf6 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 5 Nov 2021 14:42:32 +0200 Subject: [PATCH 0854/1056] pinctrl: armada-37xx: Use temporary variable for struct device [ Upstream commit 50cf2ed284e49028a885aa56c3ea50714c635879 ] Use temporary variable for struct device to make code neater. Signed-off-by: Andy Shevchenko Reviewed-by: Gregory CLEMENT Signed-off-by: Sasha Levin --- drivers/pinctrl/mvebu/pinctrl-armada-37xx.c | 56 +++++++++------------ 1 file changed, 23 insertions(+), 33 deletions(-) diff --git a/drivers/pinctrl/mvebu/pinctrl-armada-37xx.c b/drivers/pinctrl/mvebu/pinctrl-armada-37xx.c index 85a0052bb0e62c..e1f76b4ddf237c 100644 --- a/drivers/pinctrl/mvebu/pinctrl-armada-37xx.c +++ b/drivers/pinctrl/mvebu/pinctrl-armada-37xx.c @@ -341,12 +341,12 @@ static int armada_37xx_pmx_set_by_name(struct pinctrl_dev *pctldev, struct armada_37xx_pin_group *grp) { struct armada_37xx_pinctrl *info = pinctrl_dev_get_drvdata(pctldev); + struct device *dev = info->dev; unsigned int reg = SELECTION; unsigned int mask = grp->reg_mask; int func, val; - dev_dbg(info->dev, "enable function %s group %s\n", - name, grp->name); + dev_dbg(dev, "enable function %s group %s\n", name, grp->name); func = match_string(grp->funcs, NB_FUNCS, name); if (func < 0) @@ -722,16 +722,16 @@ static unsigned int armada_37xx_irq_startup(struct irq_data *d) static int armada_37xx_irqchip_register(struct platform_device *pdev, struct armada_37xx_pinctrl *info) { - struct device_node *np = info->dev->of_node; struct gpio_chip *gc = &info->gpio_chip; struct irq_chip *irqchip = &info->irq_chip; struct gpio_irq_chip *girq = &gc->irq; struct device *dev = &pdev->dev; + struct device_node *np; struct resource res; int ret = -ENODEV, i, nr_irq_parent; /* Check if we have at least one gpio-controller child node */ - for_each_child_of_node(info->dev->of_node, np) { + for_each_child_of_node(dev->of_node, np) { if (of_property_read_bool(np, "gpio-controller")) { ret = 0; break; @@ -750,12 +750,12 @@ static int armada_37xx_irqchip_register(struct platform_device *pdev, return 0; } - if (of_address_to_resource(info->dev->of_node, 1, &res)) { + if (of_address_to_resource(dev->of_node, 1, &res)) { dev_err(dev, "cannot find IO resource\n"); return -ENOENT; } - info->base = devm_ioremap_resource(info->dev, &res); + info->base = devm_ioremap_resource(dev, &res); if (IS_ERR(info->base)) return PTR_ERR(info->base); @@ -774,8 +774,7 @@ static int armada_37xx_irqchip_register(struct platform_device *pdev, * the chained irq with all of them. */ girq->num_parents = nr_irq_parent; - girq->parents = devm_kcalloc(&pdev->dev, nr_irq_parent, - sizeof(*girq->parents), GFP_KERNEL); + girq->parents = devm_kcalloc(dev, nr_irq_parent, sizeof(*girq->parents), GFP_KERNEL); if (!girq->parents) return -ENOMEM; for (i = 0; i < nr_irq_parent; i++) { @@ -794,11 +793,12 @@ static int armada_37xx_irqchip_register(struct platform_device *pdev, static int armada_37xx_gpiochip_register(struct platform_device *pdev, struct armada_37xx_pinctrl *info) { + struct device *dev = &pdev->dev; struct device_node *np; struct gpio_chip *gc; int ret = -ENODEV; - for_each_child_of_node(info->dev->of_node, np) { + for_each_child_of_node(dev->of_node, np) { if (of_find_property(np, "gpio-controller", NULL)) { ret = 0; break; @@ -811,19 +811,16 @@ static int armada_37xx_gpiochip_register(struct platform_device *pdev, gc = &info->gpio_chip; gc->ngpio = info->data->nr_pins; - gc->parent = &pdev->dev; + gc->parent = dev; gc->base = -1; gc->of_node = np; gc->label = info->data->name; ret = armada_37xx_irqchip_register(pdev, info); - if (ret) - return ret; - ret = devm_gpiochip_add_data(&pdev->dev, gc, info); if (ret) return ret; - return 0; + return devm_gpiochip_add_data(dev, gc, info); } /** @@ -874,13 +871,13 @@ static int armada_37xx_add_function(struct armada_37xx_pmx_func *funcs, static int armada_37xx_fill_group(struct armada_37xx_pinctrl *info) { int n, num = 0, funcsize = info->data->nr_pins; + struct device *dev = info->dev; for (n = 0; n < info->ngroups; n++) { struct armada_37xx_pin_group *grp = &info->groups[n]; int i, j, f; - grp->pins = devm_kcalloc(info->dev, - grp->npins + grp->extra_npins, + grp->pins = devm_kcalloc(dev, grp->npins + grp->extra_npins, sizeof(*grp->pins), GFP_KERNEL); if (!grp->pins) @@ -898,8 +895,7 @@ static int armada_37xx_fill_group(struct armada_37xx_pinctrl *info) ret = armada_37xx_add_function(info->funcs, &funcsize, grp->funcs[f]); if (ret == -EOVERFLOW) - dev_err(info->dev, - "More functions than pins(%d)\n", + dev_err(dev, "More functions than pins(%d)\n", info->data->nr_pins); if (ret < 0) continue; @@ -925,6 +921,7 @@ static int armada_37xx_fill_group(struct armada_37xx_pinctrl *info) static int armada_37xx_fill_func(struct armada_37xx_pinctrl *info) { struct armada_37xx_pmx_func *funcs = info->funcs; + struct device *dev = info->dev; int n; for (n = 0; n < info->nfuncs; n++) { @@ -932,8 +929,7 @@ static int armada_37xx_fill_func(struct armada_37xx_pinctrl *info) const char **groups; int g; - funcs[n].groups = devm_kcalloc(info->dev, - funcs[n].ngroups, + funcs[n].groups = devm_kcalloc(dev, funcs[n].ngroups, sizeof(*(funcs[n].groups)), GFP_KERNEL); if (!funcs[n].groups) @@ -962,6 +958,7 @@ static int armada_37xx_pinctrl_register(struct platform_device *pdev, const struct armada_37xx_pin_data *pin_data = info->data; struct pinctrl_desc *ctrldesc = &info->pctl; struct pinctrl_pin_desc *pindesc, *pdesc; + struct device *dev = &pdev->dev; int pin, ret; info->groups = pin_data->groups; @@ -973,9 +970,7 @@ static int armada_37xx_pinctrl_register(struct platform_device *pdev, ctrldesc->pmxops = &armada_37xx_pmx_ops; ctrldesc->confops = &armada_37xx_pinconf_ops; - pindesc = devm_kcalloc(&pdev->dev, - pin_data->nr_pins, sizeof(*pindesc), - GFP_KERNEL); + pindesc = devm_kcalloc(dev, pin_data->nr_pins, sizeof(*pindesc), GFP_KERNEL); if (!pindesc) return -ENOMEM; @@ -994,14 +989,10 @@ static int armada_37xx_pinctrl_register(struct platform_device *pdev, * we allocate functions for number of pins and hope there are * fewer unique functions than pins available */ - info->funcs = devm_kcalloc(&pdev->dev, - pin_data->nr_pins, - sizeof(struct armada_37xx_pmx_func), - GFP_KERNEL); + info->funcs = devm_kcalloc(dev, pin_data->nr_pins, sizeof(*info->funcs), GFP_KERNEL); if (!info->funcs) return -ENOMEM; - ret = armada_37xx_fill_group(info); if (ret) return ret; @@ -1010,9 +1001,9 @@ static int armada_37xx_pinctrl_register(struct platform_device *pdev, if (ret) return ret; - info->pctl_dev = devm_pinctrl_register(&pdev->dev, ctrldesc, info); + info->pctl_dev = devm_pinctrl_register(dev, ctrldesc, info); if (IS_ERR(info->pctl_dev)) { - dev_err(&pdev->dev, "could not register pinctrl driver\n"); + dev_err(dev, "could not register pinctrl driver\n"); return PTR_ERR(info->pctl_dev); } @@ -1143,8 +1134,7 @@ static int __init armada_37xx_pinctrl_probe(struct platform_device *pdev) struct regmap *regmap; int ret; - info = devm_kzalloc(dev, sizeof(struct armada_37xx_pinctrl), - GFP_KERNEL); + info = devm_kzalloc(dev, sizeof(*info), GFP_KERNEL); if (!info) return -ENOMEM; @@ -1152,7 +1142,7 @@ static int __init armada_37xx_pinctrl_probe(struct platform_device *pdev) regmap = syscon_node_to_regmap(np); if (IS_ERR(regmap)) { - dev_err(&pdev->dev, "cannot get regmap\n"); + dev_err(dev, "cannot get regmap\n"); return PTR_ERR(regmap); } info->regmap = regmap; From 47c9f6bfc23488169ee4ee10da806bc17431db63 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 5 Nov 2021 14:42:33 +0200 Subject: [PATCH 0855/1056] pinctrl: armada-37xx: Make use of the devm_platform_ioremap_resource() [ Upstream commit 49bdef501728acbfadc7eeafafb4f6c3fea415eb ] Use the devm_platform_ioremap_resource() helper instead of calling of_address_to_resource() and devm_ioremap_resource() separately. Signed-off-by: Andy Shevchenko Reviewed-by: Gregory CLEMENT Signed-off-by: Sasha Levin --- drivers/pinctrl/mvebu/pinctrl-armada-37xx.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/drivers/pinctrl/mvebu/pinctrl-armada-37xx.c b/drivers/pinctrl/mvebu/pinctrl-armada-37xx.c index e1f76b4ddf237c..40bcf05123ebfd 100644 --- a/drivers/pinctrl/mvebu/pinctrl-armada-37xx.c +++ b/drivers/pinctrl/mvebu/pinctrl-armada-37xx.c @@ -727,7 +727,6 @@ static int armada_37xx_irqchip_register(struct platform_device *pdev, struct gpio_irq_chip *girq = &gc->irq; struct device *dev = &pdev->dev; struct device_node *np; - struct resource res; int ret = -ENODEV, i, nr_irq_parent; /* Check if we have at least one gpio-controller child node */ @@ -750,12 +749,7 @@ static int armada_37xx_irqchip_register(struct platform_device *pdev, return 0; } - if (of_address_to_resource(dev->of_node, 1, &res)) { - dev_err(dev, "cannot find IO resource\n"); - return -ENOENT; - } - - info->base = devm_ioremap_resource(dev, &res); + info->base = devm_platform_ioremap_resource(pdev, 1); if (IS_ERR(info->base)) return PTR_ERR(info->base); From 9d674108fe76a91be66b12ba1598221497072dae Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 5 Nov 2021 14:42:34 +0200 Subject: [PATCH 0856/1056] pinctrl: armada-37xx: Convert to use dev_err_probe() [ Upstream commit 06cb10ea0cd5c5f4db9627a33ab47fec32cb5960 ] It's fine to call dev_err_probe() in ->probe() when error code is known. Convert the driver to use dev_err_probe(). Signed-off-by: Andy Shevchenko Reviewed-by: Gregory CLEMENT Signed-off-by: Sasha Levin --- drivers/pinctrl/mvebu/pinctrl-armada-37xx.c | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/drivers/pinctrl/mvebu/pinctrl-armada-37xx.c b/drivers/pinctrl/mvebu/pinctrl-armada-37xx.c index 40bcf05123ebfd..7d0d2771a9acad 100644 --- a/drivers/pinctrl/mvebu/pinctrl-armada-37xx.c +++ b/drivers/pinctrl/mvebu/pinctrl-armada-37xx.c @@ -736,10 +736,8 @@ static int armada_37xx_irqchip_register(struct platform_device *pdev, break; } } - if (ret) { - dev_err(dev, "no gpio-controller child node\n"); - return ret; - } + if (ret) + return dev_err_probe(dev, ret, "no gpio-controller child node\n"); nr_irq_parent = of_irq_count(np); spin_lock_init(&info->irq_lock); @@ -996,10 +994,8 @@ static int armada_37xx_pinctrl_register(struct platform_device *pdev, return ret; info->pctl_dev = devm_pinctrl_register(dev, ctrldesc, info); - if (IS_ERR(info->pctl_dev)) { - dev_err(dev, "could not register pinctrl driver\n"); - return PTR_ERR(info->pctl_dev); - } + if (IS_ERR(info->pctl_dev)) + return dev_err_probe(dev, PTR_ERR(info->pctl_dev), "could not register pinctrl driver\n"); return 0; } @@ -1135,10 +1131,8 @@ static int __init armada_37xx_pinctrl_probe(struct platform_device *pdev) info->dev = dev; regmap = syscon_node_to_regmap(np); - if (IS_ERR(regmap)) { - dev_err(dev, "cannot get regmap\n"); - return PTR_ERR(regmap); - } + if (IS_ERR(regmap)) + return dev_err_probe(dev, PTR_ERR(regmap), "cannot get regmap\n"); info->regmap = regmap; info->data = of_device_get_match_data(dev); From fad55088ad7384d9d26271e0fcb2ee36bb5dabe5 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Sun, 17 Jul 2022 02:37:45 +0300 Subject: [PATCH 0857/1056] pinctrl: armada-37xx: use raw spinlocks for regmap to avoid invalid wait context [ Upstream commit 4546760619cfa9b718fe2059ceb07101cf9ff61e ] The irqchip->irq_set_type method is called by __irq_set_trigger() under the desc->lock raw spinlock. The armada-37xx implementation, armada_37xx_irq_set_type(), uses an MMIO regmap created by of_syscon_register(), which uses plain spinlocks (the kind that are sleepable on RT). Therefore, this is an invalid locking scheme for which we get a kernel splat stating just that ("[ BUG: Invalid wait context ]"), because the context in which the plain spinlock may sleep is atomic due to the raw spinlock. We need to go raw spinlocks all the way. Make this driver create its own MMIO regmap, with use_raw_spinlock=true, and stop relying on syscon to provide it. This patch depends on commit 67021f25d952 ("regmap: teach regmap to use raw spinlocks if requested in the config"). Cc: # 5.15+ Fixes: 2f227605394b ("pinctrl: armada-37xx: Add irqchip support") Signed-off-by: Vladimir Oltean Link: https://lore.kernel.org/r/20220716233745.1704677-3-vladimir.oltean@nxp.com Signed-off-by: Linus Walleij Signed-off-by: Sasha Levin --- drivers/pinctrl/mvebu/pinctrl-armada-37xx.c | 27 ++++++++++++++++----- 1 file changed, 21 insertions(+), 6 deletions(-) diff --git a/drivers/pinctrl/mvebu/pinctrl-armada-37xx.c b/drivers/pinctrl/mvebu/pinctrl-armada-37xx.c index 7d0d2771a9acad..7338bc353347ea 100644 --- a/drivers/pinctrl/mvebu/pinctrl-armada-37xx.c +++ b/drivers/pinctrl/mvebu/pinctrl-armada-37xx.c @@ -1116,25 +1116,40 @@ static const struct of_device_id armada_37xx_pinctrl_of_match[] = { { }, }; +static const struct regmap_config armada_37xx_pinctrl_regmap_config = { + .reg_bits = 32, + .val_bits = 32, + .reg_stride = 4, + .use_raw_spinlock = true, +}; + static int __init armada_37xx_pinctrl_probe(struct platform_device *pdev) { struct armada_37xx_pinctrl *info; struct device *dev = &pdev->dev; - struct device_node *np = dev->of_node; struct regmap *regmap; + void __iomem *base; int ret; + base = devm_platform_get_and_ioremap_resource(pdev, 0, NULL); + if (IS_ERR(base)) { + dev_err(dev, "failed to ioremap base address: %pe\n", base); + return PTR_ERR(base); + } + + regmap = devm_regmap_init_mmio(dev, base, + &armada_37xx_pinctrl_regmap_config); + if (IS_ERR(regmap)) { + dev_err(dev, "failed to create regmap: %pe\n", regmap); + return PTR_ERR(regmap); + } + info = devm_kzalloc(dev, sizeof(*info), GFP_KERNEL); if (!info) return -ENOMEM; info->dev = dev; - - regmap = syscon_node_to_regmap(np); - if (IS_ERR(regmap)) - return dev_err_probe(dev, PTR_ERR(regmap), "cannot get regmap\n"); info->regmap = regmap; - info->data = of_device_get_match_data(dev); ret = armada_37xx_pinctrl_register(pdev, info); From 893ea2b35bb9338444f76fa2eb8deae7a0415afa Mon Sep 17 00:00:00 2001 From: Dawid Lukwinski Date: Fri, 15 Jul 2022 14:45:41 -0700 Subject: [PATCH 0858/1056] i40e: Fix erroneous adapter reinitialization during recovery process [ Upstream commit f838a63369818faadec4ad1736cfbd20ab5da00e ] Fix an issue when driver incorrectly detects state of recovery process and erroneously reinitializes interrupts, which results in a kernel error and call trace message. The issue was caused by a combination of two factors: 1. Assuming the EMP reset issued after completing firmware recovery means the whole recovery process is complete. 2. Erroneous reinitialization of interrupt vector after detecting the above mentioned EMP reset. Fixes (1) by changing how recovery state change is detected and (2) by adjusting the conditional expression to ensure using proper interrupt reinitialization method, depending on the situation. Fixes: 4ff0ee1af016 ("i40e: Introduce recovery mode support") Signed-off-by: Dawid Lukwinski Signed-off-by: Jan Sokolowski Tested-by: Konrad Jankowski Signed-off-by: Tony Nguyen Link: https://lore.kernel.org/r/20220715214542.2968762-1-anthony.l.nguyen@intel.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/i40e/i40e_main.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 02594e4d6258c6..c801b128e5b2bb 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -10631,7 +10631,7 @@ static int i40e_reset(struct i40e_pf *pf) **/ static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired) { - int old_recovery_mode_bit = test_bit(__I40E_RECOVERY_MODE, pf->state); + const bool is_recovery_mode_reported = i40e_check_recovery_mode(pf); struct i40e_vsi *vsi = pf->vsi[pf->lan_vsi]; struct i40e_hw *hw = &pf->hw; i40e_status ret; @@ -10639,13 +10639,11 @@ static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired) int v; if (test_bit(__I40E_EMP_RESET_INTR_RECEIVED, pf->state) && - i40e_check_recovery_mode(pf)) { + is_recovery_mode_reported) i40e_set_ethtool_ops(pf->vsi[pf->lan_vsi]->netdev); - } if (test_bit(__I40E_DOWN, pf->state) && - !test_bit(__I40E_RECOVERY_MODE, pf->state) && - !old_recovery_mode_bit) + !test_bit(__I40E_RECOVERY_MODE, pf->state)) goto clear_recovery; dev_dbg(&pf->pdev->dev, "Rebuilding internal switch\n"); @@ -10672,13 +10670,12 @@ static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired) * accordingly with regard to resources initialization * and deinitialization */ - if (test_bit(__I40E_RECOVERY_MODE, pf->state) || - old_recovery_mode_bit) { + if (test_bit(__I40E_RECOVERY_MODE, pf->state)) { if (i40e_get_capabilities(pf, i40e_aqc_opc_list_func_capabilities)) goto end_unlock; - if (test_bit(__I40E_RECOVERY_MODE, pf->state)) { + if (is_recovery_mode_reported) { /* we're staying in recovery mode so we'll reinitialize * misc vector here */ From 16f929a5e76fd047fd8697e1e568bdd7d771955c Mon Sep 17 00:00:00 2001 From: Piotr Skajewski Date: Fri, 15 Jul 2022 14:44:56 -0700 Subject: [PATCH 0859/1056] ixgbe: Add locking to prevent panic when setting sriov_numvfs to zero [ Upstream commit 1e53834ce541d4fe271cdcca7703e50be0a44f8a ] It is possible to disable VFs while the PF driver is processing requests from the VF driver. This can result in a panic. BUG: unable to handle kernel paging request at 000000000000106c PGD 0 P4D 0 Oops: 0000 [#1] SMP NOPTI CPU: 8 PID: 0 Comm: swapper/8 Kdump: loaded Tainted: G I --------- - Hardware name: Dell Inc. PowerEdge R740/06WXJT, BIOS 2.8.2 08/27/2020 RIP: 0010:ixgbe_msg_task+0x4c8/0x1690 [ixgbe] Code: 00 00 48 8d 04 40 48 c1 e0 05 89 7c 24 24 89 fd 48 89 44 24 10 83 ff 01 0f 84 b8 04 00 00 4c 8b 64 24 10 4d 03 a5 48 22 00 00 <41> 80 7c 24 4c 00 0f 84 8a 03 00 00 0f b7 c7 83 f8 08 0f 84 8f 0a RSP: 0018:ffffb337869f8df8 EFLAGS: 00010002 RAX: 0000000000001020 RBX: 0000000000000000 RCX: 000000000000002b RDX: 0000000000000002 RSI: 0000000000000008 RDI: 0000000000000006 RBP: 0000000000000006 R08: 0000000000000002 R09: 0000000000029780 R10: 00006957d8f42832 R11: 0000000000000000 R12: 0000000000001020 R13: ffff8a00e8978ac0 R14: 000000000000002b R15: ffff8a00e8979c80 FS: 0000000000000000(0000) GS:ffff8a07dfd00000(0000) knlGS:00000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 000000000000106c CR3: 0000000063e10004 CR4: 00000000007726e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Call Trace: ? ttwu_do_wakeup+0x19/0x140 ? try_to_wake_up+0x1cd/0x550 ? ixgbevf_update_xcast_mode+0x71/0xc0 [ixgbevf] ixgbe_msix_other+0x17e/0x310 [ixgbe] __handle_irq_event_percpu+0x40/0x180 handle_irq_event_percpu+0x30/0x80 handle_irq_event+0x36/0x53 handle_edge_irq+0x82/0x190 handle_irq+0x1c/0x30 do_IRQ+0x49/0xd0 common_interrupt+0xf/0xf This can be eventually be reproduced with the following script: while : do echo 63 > /sys/class/net//device/sriov_numvfs sleep 1 echo 0 > /sys/class/net//device/sriov_numvfs sleep 1 done Add lock when disabling SR-IOV to prevent process VF mailbox communication. Fixes: d773d1310625 ("ixgbe: Fix memory leak when SR-IOV VFs are direct assigned") Signed-off-by: Piotr Skajewski Tested-by: Marek Szlosek Signed-off-by: Tony Nguyen Link: https://lore.kernel.org/r/20220715214456.2968711-1-anthony.l.nguyen@intel.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/ixgbe/ixgbe.h | 1 + drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 3 +++ drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c | 6 ++++++ 3 files changed, 10 insertions(+) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h index a604552fa634e8..c375a5d54b40df 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h @@ -770,6 +770,7 @@ struct ixgbe_adapter { #ifdef CONFIG_IXGBE_IPSEC struct ixgbe_ipsec *ipsec; #endif /* CONFIG_IXGBE_IPSEC */ + spinlock_t vfs_lock; }; static inline u8 ixgbe_max_rss_indices(struct ixgbe_adapter *adapter) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 750b02bb2fdc28..8cb20af51ecd62 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -6397,6 +6397,9 @@ static int ixgbe_sw_init(struct ixgbe_adapter *adapter, /* n-tuple support exists, always init our spinlock */ spin_lock_init(&adapter->fdir_perfect_lock); + /* init spinlock to avoid concurrency of VF resources */ + spin_lock_init(&adapter->vfs_lock); + #ifdef CONFIG_IXGBE_DCB ixgbe_init_dcb(adapter); #endif diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c index aaebdae8b5fffe..0078ae59261646 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c @@ -204,10 +204,13 @@ void ixgbe_enable_sriov(struct ixgbe_adapter *adapter, unsigned int max_vfs) int ixgbe_disable_sriov(struct ixgbe_adapter *adapter) { unsigned int num_vfs = adapter->num_vfs, vf; + unsigned long flags; int rss; + spin_lock_irqsave(&adapter->vfs_lock, flags); /* set num VFs to 0 to prevent access to vfinfo */ adapter->num_vfs = 0; + spin_unlock_irqrestore(&adapter->vfs_lock, flags); /* put the reference to all of the vf devices */ for (vf = 0; vf < num_vfs; ++vf) { @@ -1305,8 +1308,10 @@ static void ixgbe_rcv_ack_from_vf(struct ixgbe_adapter *adapter, u32 vf) void ixgbe_msg_task(struct ixgbe_adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; + unsigned long flags; u32 vf; + spin_lock_irqsave(&adapter->vfs_lock, flags); for (vf = 0; vf < adapter->num_vfs; vf++) { /* process any reset requests */ if (!ixgbe_check_for_rst(hw, vf)) @@ -1320,6 +1325,7 @@ void ixgbe_msg_task(struct ixgbe_adapter *adapter) if (!ixgbe_check_for_ack(hw, vf)) ixgbe_rcv_ack_from_vf(adapter, vf); } + spin_unlock_irqrestore(&adapter->vfs_lock, flags); } void ixgbe_disable_tx_rx(struct ixgbe_adapter *adapter) From e6fc5472b8500a2479bdb0a3bb0ff00c58aaf50c Mon Sep 17 00:00:00 2001 From: Wong Vee Khee Date: Fri, 15 Jul 2022 20:24:02 +0800 Subject: [PATCH 0860/1056] net: stmmac: remove redunctant disable xPCS EEE call [ Upstream commit da791bac104a3169b05b54270afe75daacba4641 ] Disable is done in stmmac_init_eee() on the event of MAC link down. Since setting enable/disable EEE via ethtool will eventually trigger a MAC down, removing this redunctant call in stmmac_ethtool.c to avoid calling xpcs_config_eee() twice. Fixes: d4aeaed80b0e ("net: stmmac: trigger PCS EEE to turn off on link down") Signed-off-by: Wong Vee Khee Link: https://lore.kernel.org/r/20220715122402.1017470-1-vee.khee.wong@linux.intel.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c index 8f563b446d5cae..dc31501fec8ffe 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c @@ -800,14 +800,6 @@ static int stmmac_ethtool_op_set_eee(struct net_device *dev, netdev_warn(priv->dev, "Setting EEE tx-lpi is not supported\n"); - if (priv->hw->xpcs) { - ret = xpcs_config_eee(priv->hw->xpcs, - priv->plat->mult_fact_100ns, - edata->eee_enabled); - if (ret) - return ret; - } - if (!edata->eee_enabled) stmmac_disable_eee_mode(priv); From 9726ed46b9b7a8c4536845a294bee04910ba5aea Mon Sep 17 00:00:00 2001 From: Haibo Chen Date: Mon, 18 Jul 2022 16:31:41 +0800 Subject: [PATCH 0861/1056] gpio: pca953x: only use single read/write for No AI mode [ Upstream commit db8edaa09d7461ec08672a92a2eef63d5882bb79 ] For the device use NO AI mode(not support auto address increment), only use the single read/write when config the regmap. We meet issue on PCA9557PW on i.MX8QXP/DXL evk board, this device do not support AI mode, but when do the regmap sync, regmap will sync 3 byte data to register 1, logically this means write first data to register 1, write second data to register 2, write third data to register 3. But this device do not support AI mode, finally, these three data write only into register 1 one by one. the reault is the value of register 1 alway equal to the latest data, here is the third data, no operation happened on register 2 and register 3. This is not what we expect. Fixes: 49427232764d ("gpio: pca953x: Perform basic regmap conversion") Signed-off-by: Haibo Chen Reviewed-by: Andy Shevchenko Signed-off-by: Bartosz Golaszewski Signed-off-by: Sasha Levin --- drivers/gpio/gpio-pca953x.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpio/gpio-pca953x.c b/drivers/gpio/gpio-pca953x.c index 33683295a0bfe0..f334c8556a22e9 100644 --- a/drivers/gpio/gpio-pca953x.c +++ b/drivers/gpio/gpio-pca953x.c @@ -351,6 +351,9 @@ static const struct regmap_config pca953x_i2c_regmap = { .reg_bits = 8, .val_bits = 8, + .use_single_read = true, + .use_single_write = true, + .readable_reg = pca953x_readable_register, .writeable_reg = pca953x_writeable_register, .volatile_reg = pca953x_volatile_register, From dfb4b67ff4df56787694d178d0defb996f24cdfc Mon Sep 17 00:00:00 2001 From: Haibo Chen Date: Mon, 18 Jul 2022 16:31:42 +0800 Subject: [PATCH 0862/1056] gpio: pca953x: use the correct range when do regmap sync [ Upstream commit 2abc17a93867dc816f0ed9d32021dda8078e7330 ] regmap will sync a range of registers, here use the correct range to make sure the sync do not touch other unexpected registers. Find on pca9557pw on imx8qxp/dxl evk board, this device support 8 pin, so only need one register(8 bits) to cover all the 8 pins's property setting. But when sync the output, we find it actually update two registers, output register and the following register. Fixes: b76574300504 ("gpio: pca953x: Restore registers after suspend/resume cycle") Fixes: ec82d1eba346 ("gpio: pca953x: Zap ad-hoc reg_output cache") Fixes: 0f25fda840a9 ("gpio: pca953x: Zap ad-hoc reg_direction cache") Signed-off-by: Haibo Chen Reviewed-by: Andy Shevchenko Signed-off-by: Bartosz Golaszewski Signed-off-by: Sasha Levin --- drivers/gpio/gpio-pca953x.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/gpio/gpio-pca953x.c b/drivers/gpio/gpio-pca953x.c index f334c8556a22e9..60b7616dd4aa9b 100644 --- a/drivers/gpio/gpio-pca953x.c +++ b/drivers/gpio/gpio-pca953x.c @@ -900,12 +900,12 @@ static int device_pca95xx_init(struct pca953x_chip *chip, u32 invert) int ret; ret = regcache_sync_region(chip->regmap, chip->regs->output, - chip->regs->output + NBANK(chip)); + chip->regs->output + NBANK(chip) - 1); if (ret) goto out; ret = regcache_sync_region(chip->regmap, chip->regs->direction, - chip->regs->direction + NBANK(chip)); + chip->regs->direction + NBANK(chip) - 1); if (ret) goto out; @@ -1118,14 +1118,14 @@ static int pca953x_regcache_sync(struct device *dev) * sync these registers first and only then sync the rest. */ regaddr = pca953x_recalc_addr(chip, chip->regs->direction, 0); - ret = regcache_sync_region(chip->regmap, regaddr, regaddr + NBANK(chip)); + ret = regcache_sync_region(chip->regmap, regaddr, regaddr + NBANK(chip) - 1); if (ret) { dev_err(dev, "Failed to sync GPIO dir registers: %d\n", ret); return ret; } regaddr = pca953x_recalc_addr(chip, chip->regs->output, 0); - ret = regcache_sync_region(chip->regmap, regaddr, regaddr + NBANK(chip)); + ret = regcache_sync_region(chip->regmap, regaddr, regaddr + NBANK(chip) - 1); if (ret) { dev_err(dev, "Failed to sync GPIO out registers: %d\n", ret); return ret; @@ -1135,7 +1135,7 @@ static int pca953x_regcache_sync(struct device *dev) if (chip->driver_data & PCA_PCAL) { regaddr = pca953x_recalc_addr(chip, PCAL953X_IN_LATCH, 0); ret = regcache_sync_region(chip->regmap, regaddr, - regaddr + NBANK(chip)); + regaddr + NBANK(chip) - 1); if (ret) { dev_err(dev, "Failed to sync INT latch registers: %d\n", ret); @@ -1144,7 +1144,7 @@ static int pca953x_regcache_sync(struct device *dev) regaddr = pca953x_recalc_addr(chip, PCAL953X_INT_MASK, 0); ret = regcache_sync_region(chip->regmap, regaddr, - regaddr + NBANK(chip)); + regaddr + NBANK(chip) - 1); if (ret) { dev_err(dev, "Failed to sync INT mask registers: %d\n", ret); From caae64d3e91b1ebd861bccebd08ea520ec8227e0 Mon Sep 17 00:00:00 2001 From: Haibo Chen Date: Mon, 18 Jul 2022 16:31:43 +0800 Subject: [PATCH 0863/1056] gpio: pca953x: use the correct register address when regcache sync during init [ Upstream commit b8c768ccdd8338504fb78370747728d5002b1b5a ] For regcache_sync_region, we need to use pca953x_recalc_addr() to get the real register address. Fixes: ec82d1eba346 ("gpio: pca953x: Zap ad-hoc reg_output cache") Fixes: 0f25fda840a9 ("gpio: pca953x: Zap ad-hoc reg_direction cache") Signed-off-by: Haibo Chen Reviewed-by: Andy Shevchenko Signed-off-by: Bartosz Golaszewski Signed-off-by: Sasha Levin --- drivers/gpio/gpio-pca953x.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/gpio/gpio-pca953x.c b/drivers/gpio/gpio-pca953x.c index 60b7616dd4aa9b..64befd6f702b23 100644 --- a/drivers/gpio/gpio-pca953x.c +++ b/drivers/gpio/gpio-pca953x.c @@ -897,15 +897,18 @@ static int pca953x_irq_setup(struct pca953x_chip *chip, static int device_pca95xx_init(struct pca953x_chip *chip, u32 invert) { DECLARE_BITMAP(val, MAX_LINE); + u8 regaddr; int ret; - ret = regcache_sync_region(chip->regmap, chip->regs->output, - chip->regs->output + NBANK(chip) - 1); + regaddr = pca953x_recalc_addr(chip, chip->regs->output, 0); + ret = regcache_sync_region(chip->regmap, regaddr, + regaddr + NBANK(chip) - 1); if (ret) goto out; - ret = regcache_sync_region(chip->regmap, chip->regs->direction, - chip->regs->direction + NBANK(chip) - 1); + regaddr = pca953x_recalc_addr(chip, chip->regs->direction, 0); + ret = regcache_sync_region(chip->regmap, regaddr, + regaddr + NBANK(chip) - 1); if (ret) goto out; From aba8ff847f4f927ad7a1a1ee4a9f29989a1a728f Mon Sep 17 00:00:00 2001 From: Hristo Venev Date: Sat, 16 Jul 2022 11:51:34 +0300 Subject: [PATCH 0864/1056] be2net: Fix buffer overflow in be_get_module_eeprom [ Upstream commit d7241f679a59cfe27f92cb5c6272cb429fb1f7ec ] be_cmd_read_port_transceiver_data assumes that it is given a buffer that is at least PAGE_DATA_LEN long, or twice that if the module supports SFF 8472. However, this is not always the case. Fix this by passing the desired offset and length to be_cmd_read_port_transceiver_data so that we only copy the bytes once. Fixes: e36edd9d26cf ("be2net: add ethtool "-m" option support") Signed-off-by: Hristo Venev Link: https://lore.kernel.org/r/20220716085134.6095-1-hristo@venev.name Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/ethernet/emulex/benet/be_cmds.c | 10 +++--- drivers/net/ethernet/emulex/benet/be_cmds.h | 2 +- .../net/ethernet/emulex/benet/be_ethtool.c | 31 ++++++++++++------- 3 files changed, 25 insertions(+), 18 deletions(-) diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.c b/drivers/net/ethernet/emulex/benet/be_cmds.c index 649c5c429bd7cf..1288b5e3d22018 100644 --- a/drivers/net/ethernet/emulex/benet/be_cmds.c +++ b/drivers/net/ethernet/emulex/benet/be_cmds.c @@ -2287,7 +2287,7 @@ int be_cmd_get_beacon_state(struct be_adapter *adapter, u8 port_num, u32 *state) /* Uses sync mcc */ int be_cmd_read_port_transceiver_data(struct be_adapter *adapter, - u8 page_num, u8 *data) + u8 page_num, u32 off, u32 len, u8 *data) { struct be_dma_mem cmd; struct be_mcc_wrb *wrb; @@ -2321,10 +2321,10 @@ int be_cmd_read_port_transceiver_data(struct be_adapter *adapter, req->port = cpu_to_le32(adapter->hba_port_num); req->page_num = cpu_to_le32(page_num); status = be_mcc_notify_wait(adapter); - if (!status) { + if (!status && len > 0) { struct be_cmd_resp_port_type *resp = cmd.va; - memcpy(data, resp->page_data, PAGE_DATA_LEN); + memcpy(data, resp->page_data + off, len); } err: mutex_unlock(&adapter->mcc_lock); @@ -2415,7 +2415,7 @@ int be_cmd_query_cable_type(struct be_adapter *adapter) int status; status = be_cmd_read_port_transceiver_data(adapter, TR_PAGE_A0, - page_data); + 0, PAGE_DATA_LEN, page_data); if (!status) { switch (adapter->phy.interface_type) { case PHY_TYPE_QSFP: @@ -2440,7 +2440,7 @@ int be_cmd_query_sfp_info(struct be_adapter *adapter) int status; status = be_cmd_read_port_transceiver_data(adapter, TR_PAGE_A0, - page_data); + 0, PAGE_DATA_LEN, page_data); if (!status) { strlcpy(adapter->phy.vendor_name, page_data + SFP_VENDOR_NAME_OFFSET, SFP_VENDOR_NAME_LEN - 1); diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.h b/drivers/net/ethernet/emulex/benet/be_cmds.h index c30d6d6f0f3a0e..9e17d6a7ab8cde 100644 --- a/drivers/net/ethernet/emulex/benet/be_cmds.h +++ b/drivers/net/ethernet/emulex/benet/be_cmds.h @@ -2427,7 +2427,7 @@ int be_cmd_set_beacon_state(struct be_adapter *adapter, u8 port_num, u8 beacon, int be_cmd_get_beacon_state(struct be_adapter *adapter, u8 port_num, u32 *state); int be_cmd_read_port_transceiver_data(struct be_adapter *adapter, - u8 page_num, u8 *data); + u8 page_num, u32 off, u32 len, u8 *data); int be_cmd_query_cable_type(struct be_adapter *adapter); int be_cmd_query_sfp_info(struct be_adapter *adapter); int lancer_cmd_read_object(struct be_adapter *adapter, struct be_dma_mem *cmd, diff --git a/drivers/net/ethernet/emulex/benet/be_ethtool.c b/drivers/net/ethernet/emulex/benet/be_ethtool.c index f9955308b93d62..010a0024f3ced5 100644 --- a/drivers/net/ethernet/emulex/benet/be_ethtool.c +++ b/drivers/net/ethernet/emulex/benet/be_ethtool.c @@ -1342,7 +1342,7 @@ static int be_get_module_info(struct net_device *netdev, return -EOPNOTSUPP; status = be_cmd_read_port_transceiver_data(adapter, TR_PAGE_A0, - page_data); + 0, PAGE_DATA_LEN, page_data); if (!status) { if (!page_data[SFP_PLUS_SFF_8472_COMP]) { modinfo->type = ETH_MODULE_SFF_8079; @@ -1360,25 +1360,32 @@ static int be_get_module_eeprom(struct net_device *netdev, { struct be_adapter *adapter = netdev_priv(netdev); int status; + u32 begin, end; if (!check_privilege(adapter, MAX_PRIVILEGES)) return -EOPNOTSUPP; - status = be_cmd_read_port_transceiver_data(adapter, TR_PAGE_A0, - data); - if (status) - goto err; + begin = eeprom->offset; + end = eeprom->offset + eeprom->len; + + if (begin < PAGE_DATA_LEN) { + status = be_cmd_read_port_transceiver_data(adapter, TR_PAGE_A0, begin, + min_t(u32, end, PAGE_DATA_LEN) - begin, + data); + if (status) + goto err; + + data += PAGE_DATA_LEN - begin; + begin = PAGE_DATA_LEN; + } - if (eeprom->offset + eeprom->len > PAGE_DATA_LEN) { - status = be_cmd_read_port_transceiver_data(adapter, - TR_PAGE_A2, - data + - PAGE_DATA_LEN); + if (end > PAGE_DATA_LEN) { + status = be_cmd_read_port_transceiver_data(adapter, TR_PAGE_A2, + begin - PAGE_DATA_LEN, + end - begin, data); if (status) goto err; } - if (eeprom->offset) - memcpy(data, data + eeprom->offset, eeprom->len); err: return be_cmd_status(status); } From 25e1d782c9c3c57d7d25a5b90b11c866d1021630 Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Sun, 17 Jul 2022 15:58:30 +0200 Subject: [PATCH 0865/1056] net: dsa: sja1105: silent spi_device_id warnings [ Upstream commit 855fe49984a8a3899f07ae1d149d46cd8d4acb52 ] Add spi_device_id entries to silent following warnings: SPI driver sja1105 has no spi_device_id for nxp,sja1105e SPI driver sja1105 has no spi_device_id for nxp,sja1105t SPI driver sja1105 has no spi_device_id for nxp,sja1105p SPI driver sja1105 has no spi_device_id for nxp,sja1105q SPI driver sja1105 has no spi_device_id for nxp,sja1105r SPI driver sja1105 has no spi_device_id for nxp,sja1105s SPI driver sja1105 has no spi_device_id for nxp,sja1110a SPI driver sja1105 has no spi_device_id for nxp,sja1110b SPI driver sja1105 has no spi_device_id for nxp,sja1110c SPI driver sja1105 has no spi_device_id for nxp,sja1110d Fixes: 5fa6863ba692 ("spi: Check we have a spi_device_id for each DT compatible") Signed-off-by: Oleksij Rempel Reviewed-by: Vladimir Oltean Reviewed-by: Florian Fainelli Link: https://lore.kernel.org/r/20220717135831.2492844-1-o.rempel@pengutronix.de Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/dsa/sja1105/sja1105_main.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c index 924c3f129992f2..1a2a7536ff8aa8 100644 --- a/drivers/net/dsa/sja1105/sja1105_main.c +++ b/drivers/net/dsa/sja1105/sja1105_main.c @@ -3372,12 +3372,28 @@ static const struct of_device_id sja1105_dt_ids[] = { }; MODULE_DEVICE_TABLE(of, sja1105_dt_ids); +static const struct spi_device_id sja1105_spi_ids[] = { + { "sja1105e" }, + { "sja1105t" }, + { "sja1105p" }, + { "sja1105q" }, + { "sja1105r" }, + { "sja1105s" }, + { "sja1110a" }, + { "sja1110b" }, + { "sja1110c" }, + { "sja1110d" }, + { }, +}; +MODULE_DEVICE_TABLE(spi, sja1105_spi_ids); + static struct spi_driver sja1105_driver = { .driver = { .name = "sja1105", .owner = THIS_MODULE, .of_match_table = of_match_ptr(sja1105_dt_ids), }, + .id_table = sja1105_spi_ids, .probe = sja1105_probe, .remove = sja1105_remove, .shutdown = sja1105_shutdown, From 3d13bf301e0673bb47b2c6ef587efb441c06b928 Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Sun, 17 Jul 2022 15:58:31 +0200 Subject: [PATCH 0866/1056] net: dsa: vitesse-vsc73xx: silent spi_device_id warnings [ Upstream commit 1774559f07993e1cac33c2406e99049d4bdea6c8 ] Add spi_device_id entries to silent SPI warnings. Fixes: 5fa6863ba692 ("spi: Check we have a spi_device_id for each DT compatible") Signed-off-by: Oleksij Rempel Reviewed-by: Vladimir Oltean Link: https://lore.kernel.org/r/20220717135831.2492844-2-o.rempel@pengutronix.de Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/dsa/vitesse-vsc73xx-spi.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/net/dsa/vitesse-vsc73xx-spi.c b/drivers/net/dsa/vitesse-vsc73xx-spi.c index 645398901e05e9..922ae22fad66b3 100644 --- a/drivers/net/dsa/vitesse-vsc73xx-spi.c +++ b/drivers/net/dsa/vitesse-vsc73xx-spi.c @@ -207,10 +207,20 @@ static const struct of_device_id vsc73xx_of_match[] = { }; MODULE_DEVICE_TABLE(of, vsc73xx_of_match); +static const struct spi_device_id vsc73xx_spi_ids[] = { + { "vsc7385" }, + { "vsc7388" }, + { "vsc7395" }, + { "vsc7398" }, + { }, +}; +MODULE_DEVICE_TABLE(spi, vsc73xx_spi_ids); + static struct spi_driver vsc73xx_spi_driver = { .probe = vsc73xx_spi_probe, .remove = vsc73xx_spi_remove, .shutdown = vsc73xx_spi_shutdown, + .id_table = vsc73xx_spi_ids, .driver = { .name = "vsc73xx-spi", .of_match_table = vsc73xx_of_match, From 94e4b855e22bba7f0cddd4815f968c9def0badd7 Mon Sep 17 00:00:00 2001 From: Liang He Date: Thu, 14 Jul 2022 16:13:37 +0800 Subject: [PATCH 0867/1056] drm/imx/dcss: Add missing of_node_put() in fail path [ Upstream commit 02c87df2480ac855d88ee308ce3fa857d9bd55a8 ] In dcss_dev_create() and dcss_dev_destroy(), we should call of_node_put() in fail path or before the dcss's destroy as of_graph_get_port_by_id() has increased the refcount. Fixes: 9021c317b770 ("drm/imx: Add initial support for DCSS on iMX8MQ") Signed-off-by: Liang He Reviewed-by: Laurentiu Palcu Signed-off-by: Laurentiu Palcu Link: https://patchwork.freedesktop.org/patch/msgid/20220714081337.374761-1-windhl@126.com Signed-off-by: Sasha Levin --- drivers/gpu/drm/imx/dcss/dcss-dev.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/imx/dcss/dcss-dev.c b/drivers/gpu/drm/imx/dcss/dcss-dev.c index c849533ca83e31..3f5750cc2673e7 100644 --- a/drivers/gpu/drm/imx/dcss/dcss-dev.c +++ b/drivers/gpu/drm/imx/dcss/dcss-dev.c @@ -207,6 +207,7 @@ struct dcss_dev *dcss_dev_create(struct device *dev, bool hdmi_output) ret = dcss_submodules_init(dcss); if (ret) { + of_node_put(dcss->of_port); dev_err(dev, "submodules initialization failed\n"); goto clks_err; } @@ -237,6 +238,8 @@ void dcss_dev_destroy(struct dcss_dev *dcss) dcss_clocks_disable(dcss); } + of_node_put(dcss->of_port); + pm_runtime_disable(dcss->dev); dcss_submodules_stop(dcss); From b8d345db03b4deffb4f04219a51d3b1e94171b76 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 18 Jul 2022 10:26:39 -0700 Subject: [PATCH 0868/1056] ipv4: Fix a data-race around sysctl_fib_multipath_use_neigh. [ Upstream commit 87507bcb4f5de16bb419e9509d874f4db6c0ad0f ] While reading sysctl_fib_multipath_use_neigh, it can be changed concurrently. Thus, we need to add READ_ONCE() to its reader. Fixes: a6db4494d218 ("net: ipv4: Consider failed nexthops in multipath routes") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/ipv4/fib_semantics.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 674694d8ac61db..55de6fa83dea28 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -2233,7 +2233,7 @@ void fib_select_multipath(struct fib_result *res, int hash) } change_nexthops(fi) { - if (net->ipv4.sysctl_fib_multipath_use_neigh) { + if (READ_ONCE(net->ipv4.sysctl_fib_multipath_use_neigh)) { if (!fib_good_nh(nexthop_nh)) continue; if (!first) { From 21fb844bc1dc1461f5038d655aa1a14f39e13049 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 18 Jul 2022 10:26:40 -0700 Subject: [PATCH 0869/1056] ipv4: Fix data-races around sysctl_fib_multipath_hash_policy. [ Upstream commit 7998c12a08c97cc26660532c9f90a34bd7d8da5a ] While reading sysctl_fib_multipath_hash_policy, it can be changed concurrently. Thus, we need to add READ_ONCE() to its readers. Fixes: bf4e0a3db97e ("net: ipv4: add support for ECMP hash policy choice") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 2 +- net/ipv4/route.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index d17156c11ef804..6cdf0e232b1cc6 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -9588,7 +9588,7 @@ static void mlxsw_sp_mp4_hash_init(struct mlxsw_sp *mlxsw_sp, unsigned long *fields = config->fields; u32 hash_fields; - switch (net->ipv4.sysctl_fib_multipath_hash_policy) { + switch (READ_ONCE(net->ipv4.sysctl_fib_multipath_hash_policy)) { case 0: mlxsw_sp_mp4_hash_outer_addr(config); break; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 7f08a30256c52b..ade6cb309c4060 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2048,7 +2048,7 @@ int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4, struct flow_keys hash_keys; u32 mhash = 0; - switch (net->ipv4.sysctl_fib_multipath_hash_policy) { + switch (READ_ONCE(net->ipv4.sysctl_fib_multipath_hash_policy)) { case 0: memset(&hash_keys, 0, sizeof(hash_keys)); hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; From 36f5b86f309b3b11295d087cd7433f1c897caf94 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 18 Jul 2022 10:26:41 -0700 Subject: [PATCH 0870/1056] ipv4: Fix data-races around sysctl_fib_multipath_hash_fields. [ Upstream commit 8895a9c2ac76fb9d3922fed4fe092c8ec5e5cccc ] While reading sysctl_fib_multipath_hash_fields, it can be changed concurrently. Thus, we need to add READ_ONCE() to its readers. Fixes: ce5c9c20d364 ("ipv4: Add a sysctl to control multipath hash fields") Signed-off-by: Kuniyuki Iwashima Reviewed-by: Ido Schimmel Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 2 +- net/ipv4/route.c | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 6cdf0e232b1cc6..55de90d5ae5918 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -9606,7 +9606,7 @@ static void mlxsw_sp_mp4_hash_init(struct mlxsw_sp *mlxsw_sp, mlxsw_sp_mp_hash_inner_l3(config); break; case 3: - hash_fields = net->ipv4.sysctl_fib_multipath_hash_fields; + hash_fields = READ_ONCE(net->ipv4.sysctl_fib_multipath_hash_fields); /* Outer */ MLXSW_SP_MP_HASH_HEADER_SET(headers, IPV4_EN_NOT_TCP_NOT_UDP); MLXSW_SP_MP_HASH_HEADER_SET(headers, IPV4_EN_TCP_UDP); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index ade6cb309c4060..ca59b61fd3a315 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1929,7 +1929,7 @@ static u32 fib_multipath_custom_hash_outer(const struct net *net, const struct sk_buff *skb, bool *p_has_inner) { - u32 hash_fields = net->ipv4.sysctl_fib_multipath_hash_fields; + u32 hash_fields = READ_ONCE(net->ipv4.sysctl_fib_multipath_hash_fields); struct flow_keys keys, hash_keys; if (!(hash_fields & FIB_MULTIPATH_HASH_FIELD_OUTER_MASK)) @@ -1958,7 +1958,7 @@ static u32 fib_multipath_custom_hash_inner(const struct net *net, const struct sk_buff *skb, bool has_inner) { - u32 hash_fields = net->ipv4.sysctl_fib_multipath_hash_fields; + u32 hash_fields = READ_ONCE(net->ipv4.sysctl_fib_multipath_hash_fields); struct flow_keys keys, hash_keys; /* We assume the packet carries an encapsulation, but if none was @@ -2018,7 +2018,7 @@ static u32 fib_multipath_custom_hash_skb(const struct net *net, static u32 fib_multipath_custom_hash_fl4(const struct net *net, const struct flowi4 *fl4) { - u32 hash_fields = net->ipv4.sysctl_fib_multipath_hash_fields; + u32 hash_fields = READ_ONCE(net->ipv4.sysctl_fib_multipath_hash_fields); struct flow_keys hash_keys; if (!(hash_fields & FIB_MULTIPATH_HASH_FIELD_OUTER_MASK)) From 95724fe897a4ecf2be51452ef96e818568071664 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 18 Jul 2022 10:26:42 -0700 Subject: [PATCH 0871/1056] ip: Fix data-races around sysctl_ip_prot_sock. [ Upstream commit 9b55c20f83369dd54541d9ddbe3a018a8377f451 ] sysctl_ip_prot_sock is accessed concurrently, and there is always a chance of data-race. So, all readers and writers need some basic protection to avoid load/store-tearing. Fixes: 4548b683b781 ("Introduce a sysctl that modifies the value of PROT_SOCK.") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- include/net/ip.h | 2 +- net/ipv4/sysctl_net_ipv4.c | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/include/net/ip.h b/include/net/ip.h index a0ac57af82dcfd..8462ced0c21ecb 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -352,7 +352,7 @@ static inline bool sysctl_dev_name_is_allowed(const char *name) static inline bool inet_port_requires_bind_service(struct net *net, unsigned short port) { - return port < net->ipv4.sysctl_ip_prot_sock; + return port < READ_ONCE(net->ipv4.sysctl_ip_prot_sock); } #else diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index ead5db7e24ea5a..a36728277e3219 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -97,7 +97,7 @@ static int ipv4_local_port_range(struct ctl_table *table, int write, * port limit. */ if ((range[1] < range[0]) || - (range[0] < net->ipv4.sysctl_ip_prot_sock)) + (range[0] < READ_ONCE(net->ipv4.sysctl_ip_prot_sock))) ret = -EINVAL; else set_local_port_range(net, range); @@ -123,7 +123,7 @@ static int ipv4_privileged_ports(struct ctl_table *table, int write, .extra2 = &ip_privileged_port_max, }; - pports = net->ipv4.sysctl_ip_prot_sock; + pports = READ_ONCE(net->ipv4.sysctl_ip_prot_sock); ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); @@ -135,7 +135,7 @@ static int ipv4_privileged_ports(struct ctl_table *table, int write, if (range[0] < pports) ret = -EINVAL; else - net->ipv4.sysctl_ip_prot_sock = pports; + WRITE_ONCE(net->ipv4.sysctl_ip_prot_sock, pports); } return ret; From 3f2ac2d6511bb0652abf4d7388d65bb9ff1c641c Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 18 Jul 2022 10:26:43 -0700 Subject: [PATCH 0872/1056] udp: Fix a data-race around sysctl_udp_l3mdev_accept. [ Upstream commit 3d72bb4188c708bb16758c60822fc4dda7a95174 ] While reading sysctl_udp_l3mdev_accept, it can be changed concurrently. Thus, we need to add READ_ONCE() to its reader. Fixes: 63a6fff353d0 ("net: Avoid receiving packets with an l3mdev on unbound UDP sockets") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- include/net/udp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/net/udp.h b/include/net/udp.h index 909ecf447e0fb2..438b1b01a56ce3 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -262,7 +262,7 @@ static inline bool udp_sk_bound_dev_eq(struct net *net, int bound_dev_if, int dif, int sdif) { #if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV) - return inet_bound_dev_eq(!!net->ipv4.sysctl_udp_l3mdev_accept, + return inet_bound_dev_eq(!!READ_ONCE(net->ipv4.sysctl_udp_l3mdev_accept), bound_dev_if, dif, sdif); #else return inet_bound_dev_eq(true, bound_dev_if, dif, sdif); From 329de75b9e16104d5442319107ce3af961f2c96a Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 18 Jul 2022 10:26:44 -0700 Subject: [PATCH 0873/1056] tcp: Fix data-races around sysctl knobs related to SYN option. [ Upstream commit 3666f666e99600518ab20982af04a078bbdad277 ] While reading these knobs, they can be changed concurrently. Thus, we need to add READ_ONCE() to their readers. - tcp_sack - tcp_window_scaling - tcp_timestamps Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- .../ethernet/chelsio/inline_crypto/chtls/chtls_cm.c | 6 +++--- net/core/secure_seq.c | 4 ++-- net/ipv4/syncookies.c | 6 +++--- net/ipv4/tcp_input.c | 6 +++--- net/ipv4/tcp_output.c | 10 +++++----- 5 files changed, 16 insertions(+), 16 deletions(-) diff --git a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c index 7c760aa655404a..ddfe9208529a5e 100644 --- a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c +++ b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c @@ -1236,8 +1236,8 @@ static struct sock *chtls_recv_sock(struct sock *lsk, csk->sndbuf = newsk->sk_sndbuf; csk->smac_idx = ((struct port_info *)netdev_priv(ndev))->smt_idx; RCV_WSCALE(tp) = select_rcv_wscale(tcp_full_space(newsk), - sock_net(newsk)-> - ipv4.sysctl_tcp_window_scaling, + READ_ONCE(sock_net(newsk)-> + ipv4.sysctl_tcp_window_scaling), tp->window_clamp); neigh_release(n); inet_inherit_port(&tcp_hashinfo, lsk, newsk); @@ -1384,7 +1384,7 @@ static void chtls_pass_accept_request(struct sock *sk, #endif } if (req->tcpopt.wsf <= 14 && - sock_net(sk)->ipv4.sysctl_tcp_window_scaling) { + READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_window_scaling)) { inet_rsk(oreq)->wscale_ok = 1; inet_rsk(oreq)->snd_wscale = req->tcpopt.wsf; } diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c index 7131cd1fb2ad52..189eea1372d5db 100644 --- a/net/core/secure_seq.c +++ b/net/core/secure_seq.c @@ -64,7 +64,7 @@ u32 secure_tcpv6_ts_off(const struct net *net, .daddr = *(struct in6_addr *)daddr, }; - if (net->ipv4.sysctl_tcp_timestamps != 1) + if (READ_ONCE(net->ipv4.sysctl_tcp_timestamps) != 1) return 0; ts_secret_init(); @@ -120,7 +120,7 @@ EXPORT_SYMBOL(secure_ipv6_port_ephemeral); #ifdef CONFIG_INET u32 secure_tcp_ts_off(const struct net *net, __be32 saddr, __be32 daddr) { - if (net->ipv4.sysctl_tcp_timestamps != 1) + if (READ_ONCE(net->ipv4.sysctl_tcp_timestamps) != 1) return 0; ts_secret_init(); diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 8eee771d2acacc..9408392640250e 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -249,12 +249,12 @@ bool cookie_timestamp_decode(const struct net *net, return true; } - if (!net->ipv4.sysctl_tcp_timestamps) + if (!READ_ONCE(net->ipv4.sysctl_tcp_timestamps)) return false; tcp_opt->sack_ok = (options & TS_OPT_SACK) ? TCP_SACK_SEEN : 0; - if (tcp_opt->sack_ok && !net->ipv4.sysctl_tcp_sack) + if (tcp_opt->sack_ok && !READ_ONCE(net->ipv4.sysctl_tcp_sack)) return false; if ((options & TS_OPT_WSCALE_MASK) == TS_OPT_WSCALE_MASK) @@ -263,7 +263,7 @@ bool cookie_timestamp_decode(const struct net *net, tcp_opt->wscale_ok = 1; tcp_opt->snd_wscale = options & TS_OPT_WSCALE_MASK; - return net->ipv4.sysctl_tcp_window_scaling != 0; + return READ_ONCE(net->ipv4.sysctl_tcp_window_scaling) != 0; } EXPORT_SYMBOL(cookie_timestamp_decode); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index d9e534c6fd0cc8..dd10a317709f77 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -4053,7 +4053,7 @@ void tcp_parse_options(const struct net *net, break; case TCPOPT_WINDOW: if (opsize == TCPOLEN_WINDOW && th->syn && - !estab && net->ipv4.sysctl_tcp_window_scaling) { + !estab && READ_ONCE(net->ipv4.sysctl_tcp_window_scaling)) { __u8 snd_wscale = *(__u8 *)ptr; opt_rx->wscale_ok = 1; if (snd_wscale > TCP_MAX_WSCALE) { @@ -4069,7 +4069,7 @@ void tcp_parse_options(const struct net *net, case TCPOPT_TIMESTAMP: if ((opsize == TCPOLEN_TIMESTAMP) && ((estab && opt_rx->tstamp_ok) || - (!estab && net->ipv4.sysctl_tcp_timestamps))) { + (!estab && READ_ONCE(net->ipv4.sysctl_tcp_timestamps)))) { opt_rx->saw_tstamp = 1; opt_rx->rcv_tsval = get_unaligned_be32(ptr); opt_rx->rcv_tsecr = get_unaligned_be32(ptr + 4); @@ -4077,7 +4077,7 @@ void tcp_parse_options(const struct net *net, break; case TCPOPT_SACK_PERM: if (opsize == TCPOLEN_SACK_PERM && th->syn && - !estab && net->ipv4.sysctl_tcp_sack) { + !estab && READ_ONCE(net->ipv4.sysctl_tcp_sack)) { opt_rx->sack_ok = TCP_SACK_SEEN; tcp_sack_reset(opt_rx); } diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index fcccf56ae9f79b..a08fcf15372ad9 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -790,18 +790,18 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb, opts->mss = tcp_advertise_mss(sk); remaining -= TCPOLEN_MSS_ALIGNED; - if (likely(sock_net(sk)->ipv4.sysctl_tcp_timestamps && !*md5)) { + if (likely(READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_timestamps) && !*md5)) { opts->options |= OPTION_TS; opts->tsval = tcp_skb_timestamp(skb) + tp->tsoffset; opts->tsecr = tp->rx_opt.ts_recent; remaining -= TCPOLEN_TSTAMP_ALIGNED; } - if (likely(sock_net(sk)->ipv4.sysctl_tcp_window_scaling)) { + if (likely(READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_window_scaling))) { opts->ws = tp->rx_opt.rcv_wscale; opts->options |= OPTION_WSCALE; remaining -= TCPOLEN_WSCALE_ALIGNED; } - if (likely(sock_net(sk)->ipv4.sysctl_tcp_sack)) { + if (likely(READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_sack))) { opts->options |= OPTION_SACK_ADVERTISE; if (unlikely(!(OPTION_TS & opts->options))) remaining -= TCPOLEN_SACKPERM_ALIGNED; @@ -3649,7 +3649,7 @@ static void tcp_connect_init(struct sock *sk) * See tcp_input.c:tcp_rcv_state_process case TCP_SYN_SENT. */ tp->tcp_header_len = sizeof(struct tcphdr); - if (sock_net(sk)->ipv4.sysctl_tcp_timestamps) + if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_timestamps)) tp->tcp_header_len += TCPOLEN_TSTAMP_ALIGNED; #ifdef CONFIG_TCP_MD5SIG @@ -3685,7 +3685,7 @@ static void tcp_connect_init(struct sock *sk) tp->advmss - (tp->rx_opt.ts_recent_stamp ? tp->tcp_header_len - sizeof(struct tcphdr) : 0), &tp->rcv_wnd, &tp->window_clamp, - sock_net(sk)->ipv4.sysctl_tcp_window_scaling, + READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_window_scaling), &rcv_wscale, rcv_wnd); From 5037ca9e4b169cc9aed0174d658c3d81fdaf8ea5 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 18 Jul 2022 10:26:45 -0700 Subject: [PATCH 0874/1056] tcp: Fix a data-race around sysctl_tcp_early_retrans. [ Upstream commit 52e65865deb6a36718a463030500f16530eaab74 ] While reading sysctl_tcp_early_retrans, it can be changed concurrently. Thus, we need to add READ_ONCE() to its reader. Fixes: eed530b6c676 ("tcp: early retransmit") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/ipv4/tcp_output.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index a08fcf15372ad9..3b71d873599543 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2739,7 +2739,7 @@ bool tcp_schedule_loss_probe(struct sock *sk, bool advancing_rto) if (rcu_access_pointer(tp->fastopen_rsk)) return false; - early_retrans = sock_net(sk)->ipv4.sysctl_tcp_early_retrans; + early_retrans = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_early_retrans); /* Schedule a loss probe in 2*RTT for SACK capable connections * not in loss recovery, that are either limited by cwnd or application. */ From a31e2d0cb5cfa2aae3144cac04f25031d5d20fb4 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 18 Jul 2022 10:26:46 -0700 Subject: [PATCH 0875/1056] tcp: Fix data-races around sysctl_tcp_recovery. [ Upstream commit e7d2ef837e14a971a05f60ea08c47f3fed1a36e4 ] While reading sysctl_tcp_recovery, it can be changed concurrently. Thus, we need to add READ_ONCE() to its readers. Fixes: 4f41b1c58a32 ("tcp: use RACK to detect losses") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/ipv4/tcp_input.c | 3 ++- net/ipv4/tcp_recovery.c | 6 ++++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index dd10a317709f77..1cc0aca39c04d4 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2087,7 +2087,8 @@ static inline void tcp_init_undo(struct tcp_sock *tp) static bool tcp_is_rack(const struct sock *sk) { - return sock_net(sk)->ipv4.sysctl_tcp_recovery & TCP_RACK_LOSS_DETECTION; + return READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_recovery) & + TCP_RACK_LOSS_DETECTION; } /* If we detect SACK reneging, forget all SACK information diff --git a/net/ipv4/tcp_recovery.c b/net/ipv4/tcp_recovery.c index fd113f6226efca..ac14216f6204f7 100644 --- a/net/ipv4/tcp_recovery.c +++ b/net/ipv4/tcp_recovery.c @@ -19,7 +19,8 @@ static u32 tcp_rack_reo_wnd(const struct sock *sk) return 0; if (tp->sacked_out >= tp->reordering && - !(sock_net(sk)->ipv4.sysctl_tcp_recovery & TCP_RACK_NO_DUPTHRESH)) + !(READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_recovery) & + TCP_RACK_NO_DUPTHRESH)) return 0; } @@ -192,7 +193,8 @@ void tcp_rack_update_reo_wnd(struct sock *sk, struct rate_sample *rs) { struct tcp_sock *tp = tcp_sk(sk); - if (sock_net(sk)->ipv4.sysctl_tcp_recovery & TCP_RACK_STATIC_REO_WND || + if ((READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_recovery) & + TCP_RACK_STATIC_REO_WND) || !rs->prior_delivered) return; From 404c53ccdebd11f96954f4070cffac8e0b4d5cb6 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 18 Jul 2022 10:26:47 -0700 Subject: [PATCH 0876/1056] tcp: Fix a data-race around sysctl_tcp_thin_linear_timeouts. [ Upstream commit 7c6f2a86ca590d5187a073d987e9599985fb1c7c ] While reading sysctl_tcp_thin_linear_timeouts, it can be changed concurrently. Thus, we need to add READ_ONCE() to its reader. Fixes: 36e31b0af587 ("net: TCP thin linear timeouts") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/ipv4/tcp_timer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index ec5277becc6a1f..50bba370486e83 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -578,7 +578,7 @@ void tcp_retransmit_timer(struct sock *sk) * linear-timeout retransmissions into a black hole */ if (sk->sk_state == TCP_ESTABLISHED && - (tp->thin_lto || net->ipv4.sysctl_tcp_thin_linear_timeouts) && + (tp->thin_lto || READ_ONCE(net->ipv4.sysctl_tcp_thin_linear_timeouts)) && tcp_stream_is_thin(tp) && icsk->icsk_retransmits <= TCP_THIN_LINEAR_RETRIES) { icsk->icsk_backoff = 0; From 41aeba4506f6b70ec7500c6fe202731a4ba29fe5 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 18 Jul 2022 10:26:48 -0700 Subject: [PATCH 0877/1056] tcp: Fix data-races around sysctl_tcp_slow_start_after_idle. [ Upstream commit 4845b5713ab18a1bb6e31d1fbb4d600240b8b691 ] While reading sysctl_tcp_slow_start_after_idle, it can be changed concurrently. Thus, we need to add READ_ONCE() to its readers. Fixes: 35089bb203f4 ("[TCP]: Add tcp_slow_start_after_idle sysctl.") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- include/net/tcp.h | 4 ++-- net/ipv4/tcp_output.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index 0c609d10c32046..8ce8aafeef0f55 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1390,8 +1390,8 @@ static inline void tcp_slow_start_after_idle_check(struct sock *sk) struct tcp_sock *tp = tcp_sk(sk); s32 delta; - if (!sock_net(sk)->ipv4.sysctl_tcp_slow_start_after_idle || tp->packets_out || - ca_ops->cong_control) + if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_slow_start_after_idle) || + tp->packets_out || ca_ops->cong_control) return; delta = tcp_jiffies32 - tp->lsndtime; if (delta > inet_csk(sk)->icsk_rto) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 3b71d873599543..94f7841f7bfb19 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1901,7 +1901,7 @@ static void tcp_cwnd_validate(struct sock *sk, bool is_cwnd_limited) if (tp->packets_out > tp->snd_cwnd_used) tp->snd_cwnd_used = tp->packets_out; - if (sock_net(sk)->ipv4.sysctl_tcp_slow_start_after_idle && + if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_slow_start_after_idle) && (s32)(tcp_jiffies32 - tp->snd_cwnd_stamp) >= inet_csk(sk)->icsk_rto && !ca_ops->cong_control) tcp_cwnd_application_limited(sk); From 1a1aedbb7605314f4210174988fde8c99c25bb07 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 18 Jul 2022 10:26:49 -0700 Subject: [PATCH 0878/1056] tcp: Fix a data-race around sysctl_tcp_retrans_collapse. [ Upstream commit 1a63cb91f0c2fcdeced6d6edee8d1d886583d139 ] While reading sysctl_tcp_retrans_collapse, it can be changed concurrently. Thus, we need to add READ_ONCE() to its reader. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/ipv4/tcp_output.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 94f7841f7bfb19..caf9283f9b0f94 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -3108,7 +3108,7 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *to, struct sk_buff *skb = to, *tmp; bool first = true; - if (!sock_net(sk)->ipv4.sysctl_tcp_retrans_collapse) + if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_retrans_collapse)) return; if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN) return; From d46d55e3f9f8051585fada583fc92d7826d0731a Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 18 Jul 2022 10:26:50 -0700 Subject: [PATCH 0879/1056] tcp: Fix a data-race around sysctl_tcp_stdurg. [ Upstream commit 4e08ed41cb1194009fc1a916a59ce3ed4afd77cd ] While reading sysctl_tcp_stdurg, it can be changed concurrently. Thus, we need to add READ_ONCE() to its reader. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/ipv4/tcp_input.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 1cc0aca39c04d4..6309a4eb3acd33 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -5542,7 +5542,7 @@ static void tcp_check_urg(struct sock *sk, const struct tcphdr *th) struct tcp_sock *tp = tcp_sk(sk); u32 ptr = ntohs(th->urg_ptr); - if (ptr && !sock_net(sk)->ipv4.sysctl_tcp_stdurg) + if (ptr && !READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_stdurg)) ptr--; ptr += ntohl(th->seq); From 46d05dab1b4f99bca838a85811f321b9997ac957 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 18 Jul 2022 10:26:51 -0700 Subject: [PATCH 0880/1056] tcp: Fix a data-race around sysctl_tcp_rfc1337. [ Upstream commit 0b484c91911e758e53656d570de58c2ed81ec6f2 ] While reading sysctl_tcp_rfc1337, it can be changed concurrently. Thus, we need to add READ_ONCE() to its reader. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/ipv4/tcp_minisocks.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 13783fc58e030a..39d8101e8f99a0 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -180,7 +180,7 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb, * Oh well... nobody has a sufficient solution to this * protocol bug yet. */ - if (twsk_net(tw)->ipv4.sysctl_tcp_rfc1337 == 0) { + if (!READ_ONCE(twsk_net(tw)->ipv4.sysctl_tcp_rfc1337)) { kill: inet_twsk_deschedule_put(tw); return TCP_TW_SUCCESS; From 84cee470f77cb89d59b1c3f28ab2c7c4971ec473 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 18 Jul 2022 10:26:52 -0700 Subject: [PATCH 0881/1056] tcp: Fix a data-race around sysctl_tcp_abort_on_overflow. [ Upstream commit 2d17d9c7382327d00aeaea35af44e9b26d53206e ] While reading sysctl_tcp_abort_on_overflow, it can be changed concurrently. Thus, we need to add READ_ONCE() to its reader. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/ipv4/tcp_minisocks.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 39d8101e8f99a0..41368e77fbb81b 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -789,7 +789,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, if (sk != req->rsk_listener) __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMIGRATEREQFAILURE); - if (!sock_net(sk)->ipv4.sysctl_tcp_abort_on_overflow) { + if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_abort_on_overflow)) { inet_rsk(req)->acked = 1; return NULL; } From ce3731c61589ed73364a5b55ce34131762ef9b60 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 18 Jul 2022 10:26:53 -0700 Subject: [PATCH 0882/1056] tcp: Fix data-races around sysctl_tcp_max_reordering. [ Upstream commit a11e5b3e7a59fde1a90b0eaeaa82320495cf8cae ] While reading sysctl_tcp_max_reordering, it can be changed concurrently. Thus, we need to add READ_ONCE() to its readers. Fixes: dca145ffaa8d ("tcp: allow for bigger reordering level") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/ipv4/tcp_input.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 6309a4eb3acd33..2d21d8bf3b8cc0 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1043,7 +1043,7 @@ static void tcp_check_sack_reordering(struct sock *sk, const u32 low_seq, tp->undo_marker ? tp->undo_retrans : 0); #endif tp->reordering = min_t(u32, (metric + mss - 1) / mss, - sock_net(sk)->ipv4.sysctl_tcp_max_reordering); + READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_max_reordering)); } /* This exciting event is worth to be remembered. 8) */ @@ -2022,7 +2022,7 @@ static void tcp_check_reno_reordering(struct sock *sk, const int addend) return; tp->reordering = min_t(u32, tp->packets_out + addend, - sock_net(sk)->ipv4.sysctl_tcp_max_reordering); + READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_max_reordering)); tp->reord_seen++; NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRENOREORDER); } From e129e5486b981d324057e6986059f852658b0d00 Mon Sep 17 00:00:00 2001 From: Srinivas Neeli Date: Thu, 21 Jul 2022 13:09:09 +0530 Subject: [PATCH 0883/1056] gpio: gpio-xilinx: Fix integer overflow [ Upstream commit 32c094a09d5829ad9b02cdf667569aefa8de0ea6 ] Current implementation is not able to configure more than 32 pins due to incorrect data type. So type casting with unsigned long to avoid it. Fixes: 02b3f84d9080 ("xilinx: Switch to use bitmap APIs") Signed-off-by: Srinivas Neeli Signed-off-by: Bartosz Golaszewski Signed-off-by: Sasha Levin --- drivers/gpio/gpio-xilinx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpio/gpio-xilinx.c b/drivers/gpio/gpio-xilinx.c index a1b66338d077da..db616ae560a3ca 100644 --- a/drivers/gpio/gpio-xilinx.c +++ b/drivers/gpio/gpio-xilinx.c @@ -99,7 +99,7 @@ static inline void xgpio_set_value32(unsigned long *map, int bit, u32 v) const unsigned long offset = (bit % BITS_PER_LONG) & BIT(5); map[index] &= ~(0xFFFFFFFFul << offset); - map[index] |= v << offset; + map[index] |= (unsigned long)v << offset; } static inline int xgpio_regoffset(struct xgpio_instance *chip, int ch) From cd9f96aa8f57a8377bfe27ecf887464365ff4aa5 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Tue, 19 Jul 2022 10:08:30 +0800 Subject: [PATCH 0884/1056] KVM: selftests: Fix target thread to be migrated in rseq_test commit e923b0537d28e15c9d31ce8b38f810b325816903 upstream. In rseq_test, there are two threads, which are vCPU thread and migration worker separately. Unfortunately, the test has the wrong PID passed to sched_setaffinity() in the migration worker. It forces migration on the migration worker because zeroed PID represents the calling thread, which is the migration worker itself. It means the vCPU thread is never enforced to migration and it can migrate at any time, which eventually leads to failure as the following logs show. host# uname -r 5.19.0-rc6-gavin+ host# # cat /proc/cpuinfo | grep processor | tail -n 1 processor : 223 host# pwd /home/gavin/sandbox/linux.main/tools/testing/selftests/kvm host# for i in `seq 1 100`; do \ echo "--------> $i"; ./rseq_test; done --------> 1 --------> 2 --------> 3 --------> 4 --------> 5 --------> 6 ==== Test Assertion Failure ==== rseq_test.c:265: rseq_cpu == cpu pid=3925 tid=3925 errno=4 - Interrupted system call 1 0x0000000000401963: main at rseq_test.c:265 (discriminator 2) 2 0x0000ffffb044affb: ?? ??:0 3 0x0000ffffb044b0c7: ?? ??:0 4 0x0000000000401a6f: _start at ??:? rseq CPU = 4, sched CPU = 27 Fix the issue by passing correct parameter, TID of the vCPU thread, to sched_setaffinity() in the migration worker. Fixes: 61e52f1630f5 ("KVM: selftests: Add a test for KVM_RUN+rseq to detect task migration bugs") Suggested-by: Sean Christopherson Signed-off-by: Gavin Shan Reviewed-by: Oliver Upton Message-Id: <20220719020830.3479482-1-gshan@redhat.com> Reviewed-by: Andrew Jones Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/kvm/rseq_test.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/kvm/rseq_test.c b/tools/testing/selftests/kvm/rseq_test.c index 4158da0da2bba8..2237d1aac80141 100644 --- a/tools/testing/selftests/kvm/rseq_test.c +++ b/tools/testing/selftests/kvm/rseq_test.c @@ -82,8 +82,9 @@ static int next_cpu(int cpu) return cpu; } -static void *migration_worker(void *ign) +static void *migration_worker(void *__rseq_tid) { + pid_t rseq_tid = (pid_t)(unsigned long)__rseq_tid; cpu_set_t allowed_mask; int r, i, cpu; @@ -106,7 +107,7 @@ static void *migration_worker(void *ign) * stable, i.e. while changing affinity is in-progress. */ smp_wmb(); - r = sched_setaffinity(0, sizeof(allowed_mask), &allowed_mask); + r = sched_setaffinity(rseq_tid, sizeof(allowed_mask), &allowed_mask); TEST_ASSERT(!r, "sched_setaffinity failed, errno = %d (%s)", errno, strerror(errno)); smp_wmb(); @@ -231,7 +232,8 @@ int main(int argc, char *argv[]) vm = vm_create_default(VCPU_ID, 0, guest_code); ucall_init(vm, NULL); - pthread_create(&migration_thread, NULL, migration_worker, 0); + pthread_create(&migration_thread, NULL, migration_worker, + (void *)(unsigned long)gettid()); for (i = 0; !done; i++) { vcpu_run(vm, VCPU_ID); From 58466e05390043d2805685c70f55f3f59711bdf2 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Tue, 19 Jul 2022 09:22:35 +0200 Subject: [PATCH 0885/1056] spi: bcm2835: bcm2835_spi_handle_err(): fix NULL pointer deref for non DMA transfers commit 4ceaa684459d414992acbefb4e4c31f2dfc50641 upstream. In case a IRQ based transfer times out the bcm2835_spi_handle_err() function is called. Since commit 1513ceee70f2 ("spi: bcm2835: Drop dma_pending flag") the TX and RX DMA transfers are unconditionally canceled, leading to NULL pointer derefs if ctlr->dma_tx or ctlr->dma_rx are not set. Fix the NULL pointer deref by checking that ctlr->dma_tx and ctlr->dma_rx are valid pointers before accessing them. Fixes: 1513ceee70f2 ("spi: bcm2835: Drop dma_pending flag") Cc: Lukas Wunner Signed-off-by: Marc Kleine-Budde Link: https://lore.kernel.org/r/20220719072234.2782764-1-mkl@pengutronix.de Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- drivers/spi/spi-bcm2835.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/spi/spi-bcm2835.c b/drivers/spi/spi-bcm2835.c index 775c0bf2f923d4..0933948d7df3d8 100644 --- a/drivers/spi/spi-bcm2835.c +++ b/drivers/spi/spi-bcm2835.c @@ -1138,10 +1138,14 @@ static void bcm2835_spi_handle_err(struct spi_controller *ctlr, struct bcm2835_spi *bs = spi_controller_get_devdata(ctlr); /* if an error occurred and we have an active dma, then terminate */ - dmaengine_terminate_sync(ctlr->dma_tx); - bs->tx_dma_active = false; - dmaengine_terminate_sync(ctlr->dma_rx); - bs->rx_dma_active = false; + if (ctlr->dma_tx) { + dmaengine_terminate_sync(ctlr->dma_tx); + bs->tx_dma_active = false; + } + if (ctlr->dma_rx) { + dmaengine_terminate_sync(ctlr->dma_rx); + bs->rx_dma_active = false; + } bcm2835_spi_undo_prologue(bs); /* and reset */ From e91665fbbf3ccb268b268a7d71a6513538d813ac Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Wed, 1 Jun 2022 03:43:28 +0200 Subject: [PATCH 0886/1056] KVM: Don't null dereference ops->destroy commit e8bc2427018826e02add7b0ed0fc625a60390ae5 upstream. A KVM device cleanup happens in either of two callbacks: 1) destroy() which is called when the VM is being destroyed; 2) release() which is called when a device fd is closed. Most KVM devices use 1) but Book3s's interrupt controller KVM devices (XICS, XIVE, XIVE-native) use 2) as they need to close and reopen during the machine execution. The error handling in kvm_ioctl_create_device() assumes destroy() is always defined which leads to NULL dereference as discovered by Syzkaller. This adds a checks for destroy!=NULL and adds a missing release(). This is not changing kvm_destroy_devices() as devices with defined release() should have been removed from the KVM devices list by then. Suggested-by: Paolo Bonzini Signed-off-by: Alexey Kardashevskiy Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- virt/kvm/kvm_main.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 9eac68ae291e3d..0816b8018cde29 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -4172,8 +4172,11 @@ static int kvm_ioctl_create_device(struct kvm *kvm, kvm_put_kvm_no_destroy(kvm); mutex_lock(&kvm->lock); list_del(&dev->vm_node); + if (ops->release) + ops->release(dev); mutex_unlock(&kvm->lock); - ops->destroy(dev); + if (ops->destroy) + ops->destroy(dev); return ret; } From 8c5429a04ccd8dbcc3c753dab2f4126774ec28d4 Mon Sep 17 00:00:00 2001 From: Wang Cheng Date: Thu, 19 May 2022 14:08:54 -0700 Subject: [PATCH 0887/1056] mm/mempolicy: fix uninit-value in mpol_rebind_policy() commit 018160ad314d75b1409129b2247b614a9f35894c upstream. mpol_set_nodemask()(mm/mempolicy.c) does not set up nodemask when pol->mode is MPOL_LOCAL. Check pol->mode before access pol->w.cpuset_mems_allowed in mpol_rebind_policy()(mm/mempolicy.c). BUG: KMSAN: uninit-value in mpol_rebind_policy mm/mempolicy.c:352 [inline] BUG: KMSAN: uninit-value in mpol_rebind_task+0x2ac/0x2c0 mm/mempolicy.c:368 mpol_rebind_policy mm/mempolicy.c:352 [inline] mpol_rebind_task+0x2ac/0x2c0 mm/mempolicy.c:368 cpuset_change_task_nodemask kernel/cgroup/cpuset.c:1711 [inline] cpuset_attach+0x787/0x15e0 kernel/cgroup/cpuset.c:2278 cgroup_migrate_execute+0x1023/0x1d20 kernel/cgroup/cgroup.c:2515 cgroup_migrate kernel/cgroup/cgroup.c:2771 [inline] cgroup_attach_task+0x540/0x8b0 kernel/cgroup/cgroup.c:2804 __cgroup1_procs_write+0x5cc/0x7a0 kernel/cgroup/cgroup-v1.c:520 cgroup1_tasks_write+0x94/0xb0 kernel/cgroup/cgroup-v1.c:539 cgroup_file_write+0x4c2/0x9e0 kernel/cgroup/cgroup.c:3852 kernfs_fop_write_iter+0x66a/0x9f0 fs/kernfs/file.c:296 call_write_iter include/linux/fs.h:2162 [inline] new_sync_write fs/read_write.c:503 [inline] vfs_write+0x1318/0x2030 fs/read_write.c:590 ksys_write+0x28b/0x510 fs/read_write.c:643 __do_sys_write fs/read_write.c:655 [inline] __se_sys_write fs/read_write.c:652 [inline] __x64_sys_write+0xdb/0x120 fs/read_write.c:652 do_syscall_x64 arch/x86/entry/common.c:51 [inline] do_syscall_64+0x54/0xd0 arch/x86/entry/common.c:82 entry_SYSCALL_64_after_hwframe+0x44/0xae Uninit was created at: slab_post_alloc_hook mm/slab.h:524 [inline] slab_alloc_node mm/slub.c:3251 [inline] slab_alloc mm/slub.c:3259 [inline] kmem_cache_alloc+0x902/0x11c0 mm/slub.c:3264 mpol_new mm/mempolicy.c:293 [inline] do_set_mempolicy+0x421/0xb70 mm/mempolicy.c:853 kernel_set_mempolicy mm/mempolicy.c:1504 [inline] __do_sys_set_mempolicy mm/mempolicy.c:1510 [inline] __se_sys_set_mempolicy+0x44c/0xb60 mm/mempolicy.c:1507 __x64_sys_set_mempolicy+0xd8/0x110 mm/mempolicy.c:1507 do_syscall_x64 arch/x86/entry/common.c:51 [inline] do_syscall_64+0x54/0xd0 arch/x86/entry/common.c:82 entry_SYSCALL_64_after_hwframe+0x44/0xae KMSAN: uninit-value in mpol_rebind_task (2) https://syzkaller.appspot.com/bug?id=d6eb90f952c2a5de9ea718a1b873c55cb13b59dc This patch seems to fix below bug too. KMSAN: uninit-value in mpol_rebind_mm (2) https://syzkaller.appspot.com/bug?id=f2fecd0d7013f54ec4162f60743a2b28df40926b The uninit-value is pol->w.cpuset_mems_allowed in mpol_rebind_policy(). When syzkaller reproducer runs to the beginning of mpol_new(), mpol_new() mm/mempolicy.c do_mbind() mm/mempolicy.c kernel_mbind() mm/mempolicy.c `mode` is 1(MPOL_PREFERRED), nodes_empty(*nodes) is `true` and `flags` is 0. Then mode = MPOL_LOCAL; ... policy->mode = mode; policy->flags = flags; will be executed. So in mpol_set_nodemask(), mpol_set_nodemask() mm/mempolicy.c do_mbind() kernel_mbind() pol->mode is 4 (MPOL_LOCAL), that `nodemask` in `pol` is not initialized, which will be accessed in mpol_rebind_policy(). Link: https://lkml.kernel.org/r/20220512123428.fq3wofedp6oiotd4@ppc.localdomain Signed-off-by: Wang Cheng Reported-by: Tested-by: Cc: David Rientjes Cc: Vlastimil Babka Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- mm/mempolicy.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/mempolicy.c b/mm/mempolicy.c index e75872035c7685..9db0158155e1ad 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -347,7 +347,7 @@ static void mpol_rebind_preferred(struct mempolicy *pol, */ static void mpol_rebind_policy(struct mempolicy *pol, const nodemask_t *newmask) { - if (!pol) + if (!pol || pol->mode == MPOL_LOCAL) return; if (!mpol_store_user_nodemask(pol) && nodes_equal(pol->w.cpuset_mems_allowed, *newmask)) From bc1fb3c53afd3372042a8be728937c21bc3b735a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 7 Jul 2022 12:39:00 +0000 Subject: [PATCH 0888/1056] bpf: Make sure mac_header was set before using it commit 0326195f523a549e0a9d7fd44c70b26fd7265090 upstream. Classic BPF has a way to load bytes starting from the mac header. Some skbs do not have a mac header, and skb_mac_header() in this case is returning a pointer that 65535 bytes after skb->head. Existing range check in bpf_internal_load_pointer_neg_helper() was properly kicking and no illegal access was happening. New sanity check in skb_mac_header() is firing, so we need to avoid it. WARNING: CPU: 1 PID: 28990 at include/linux/skbuff.h:2785 skb_mac_header include/linux/skbuff.h:2785 [inline] WARNING: CPU: 1 PID: 28990 at include/linux/skbuff.h:2785 bpf_internal_load_pointer_neg_helper+0x1b1/0x1c0 kernel/bpf/core.c:74 Modules linked in: CPU: 1 PID: 28990 Comm: syz-executor.0 Not tainted 5.19.0-rc4-syzkaller-00865-g4874fb9484be #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 06/29/2022 RIP: 0010:skb_mac_header include/linux/skbuff.h:2785 [inline] RIP: 0010:bpf_internal_load_pointer_neg_helper+0x1b1/0x1c0 kernel/bpf/core.c:74 Code: ff ff 45 31 f6 e9 5a ff ff ff e8 aa 27 40 00 e9 3b ff ff ff e8 90 27 40 00 e9 df fe ff ff e8 86 27 40 00 eb 9e e8 2f 2c f3 ff <0f> 0b eb b1 e8 96 27 40 00 e9 79 fe ff ff 90 41 57 41 56 41 55 41 RSP: 0018:ffffc9000309f668 EFLAGS: 00010216 RAX: 0000000000000118 RBX: ffffffffffeff00c RCX: ffffc9000e417000 RDX: 0000000000040000 RSI: ffffffff81873f21 RDI: 0000000000000003 RBP: ffff8880842878c0 R08: 0000000000000003 R09: 000000000000ffff R10: 000000000000ffff R11: 0000000000000001 R12: 0000000000000004 R13: ffff88803ac56c00 R14: 000000000000ffff R15: dffffc0000000000 FS: 00007f5c88a16700(0000) GS:ffff8880b9b00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007fdaa9f6c058 CR3: 000000003a82c000 CR4: 00000000003506e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: ____bpf_skb_load_helper_32 net/core/filter.c:276 [inline] bpf_skb_load_helper_32+0x191/0x220 net/core/filter.c:264 Fixes: f9aefd6b2aa3 ("net: warn if mac header was not set") Reported-by: syzbot Signed-off-by: Eric Dumazet Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20220707123900.945305-1-edumazet@google.com Signed-off-by: Greg Kroah-Hartman --- kernel/bpf/core.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 15946c11524e70..4ce500eac2ef24 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -66,11 +66,13 @@ void *bpf_internal_load_pointer_neg_helper(const struct sk_buff *skb, int k, uns { u8 *ptr = NULL; - if (k >= SKF_NET_OFF) + if (k >= SKF_NET_OFF) { ptr = skb_network_header(skb) + k - SKF_NET_OFF; - else if (k >= SKF_LL_OFF) + } else if (k >= SKF_LL_OFF) { + if (unlikely(!skb_mac_header_was_set(skb))) + return NULL; ptr = skb_mac_header(skb) + k - SKF_LL_OFF; - + } if (ptr >= skb->head && ptr + size <= skb_tail_pointer(skb)) return ptr; From 46d5575df8a12baf8fbe947ef7980a39351313e8 Mon Sep 17 00:00:00 2001 From: Juri Lelli Date: Thu, 14 Jul 2022 17:19:08 +0200 Subject: [PATCH 0889/1056] sched/deadline: Fix BUG_ON condition for deboosted tasks commit ddfc710395cccc61247348df9eb18ea50321cbed upstream. Tasks the are being deboosted from SCHED_DEADLINE might enter enqueue_task_dl() one last time and hit an erroneous BUG_ON condition: since they are not boosted anymore, the if (is_dl_boosted()) branch is not taken, but the else if (!dl_prio) is and inside this one we BUG_ON(!is_dl_boosted), which is of course false (BUG_ON triggered) otherwise we had entered the if branch above. Long story short, the current condition doesn't make sense and always leads to triggering of a BUG. Fix this by only checking enqueue flags, properly: ENQUEUE_REPLENISH has to be present, but additional flags are not a problem. Fixes: 64be6f1f5f71 ("sched/deadline: Don't replenish from a !SCHED_DEADLINE entity") Signed-off-by: Juri Lelli Signed-off-by: Peter Zijlstra (Intel) Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20220714151908.533052-1-juri.lelli@redhat.com Signed-off-by: Greg Kroah-Hartman --- kernel/sched/deadline.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index fffcb1aa77b7d5..ee673a205e2202 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -1561,7 +1561,10 @@ static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags) * the throttle. */ p->dl.dl_throttled = 0; - BUG_ON(!is_dl_boosted(&p->dl) || flags != ENQUEUE_REPLENISH); + if (!(flags & ENQUEUE_REPLENISH)) + printk_deferred_once("sched: DL de-boosted task PID %d: REPLENISH flag missing\n", + task_pid_nr(p)); + return; } From 765f802e0d25e19278a0bdc98e985bb2e523451b Mon Sep 17 00:00:00 2001 From: Pawan Gupta Date: Thu, 14 Jul 2022 16:15:35 -0700 Subject: [PATCH 0890/1056] x86/bugs: Warn when "ibrs" mitigation is selected on Enhanced IBRS parts commit eb23b5ef9131e6d65011de349a4d25ef1b3d4314 upstream. IBRS mitigation for spectre_v2 forces write to MSR_IA32_SPEC_CTRL at every kernel entry/exit. On Enhanced IBRS parts setting MSR_IA32_SPEC_CTRL[IBRS] only once at boot is sufficient. MSR writes at every kernel entry/exit incur unnecessary performance loss. When Enhanced IBRS feature is present, print a warning about this unnecessary performance loss. Signed-off-by: Pawan Gupta Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Thadeu Lima de Souza Cascardo Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/2a5eaf54583c2bfe0edc4fea64006656256cca17.1657814857.git.pawan.kumar.gupta@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 650333fce795f9..752941879792f0 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -968,6 +968,7 @@ static inline const char *spectre_v2_module_string(void) { return ""; } #define SPECTRE_V2_LFENCE_MSG "WARNING: LFENCE mitigation is not recommended for this CPU, data leaks possible!\n" #define SPECTRE_V2_EIBRS_EBPF_MSG "WARNING: Unprivileged eBPF is enabled with eIBRS on, data leaks possible via Spectre v2 BHB attacks!\n" #define SPECTRE_V2_EIBRS_LFENCE_EBPF_SMT_MSG "WARNING: Unprivileged eBPF is enabled with eIBRS+LFENCE mitigation and SMT, data leaks possible via Spectre v2 BHB attacks!\n" +#define SPECTRE_V2_IBRS_PERF_MSG "WARNING: IBRS mitigation selected on Enhanced IBRS CPU, this may cause unnecessary performance loss\n" #ifdef CONFIG_BPF_SYSCALL void unpriv_ebpf_notify(int new_state) @@ -1408,6 +1409,8 @@ static void __init spectre_v2_select_mitigation(void) case SPECTRE_V2_IBRS: setup_force_cpu_cap(X86_FEATURE_KERNEL_IBRS); + if (boot_cpu_has(X86_FEATURE_IBRS_ENHANCED)) + pr_warn(SPECTRE_V2_IBRS_PERF_MSG); break; case SPECTRE_V2_LFENCE: From 1062cfb47e565792b6fd72d7e36b0f7b542ec78a Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Wed, 6 Apr 2022 13:34:16 -0400 Subject: [PATCH 0891/1056] dlm: fix pending remove if msg allocation fails [ Upstream commit ba58995909b5098ca4003af65b0ccd5a8d13dd25 ] This patch unsets ls_remove_len and ls_remove_name if a message allocation of a remove messages fails. In this case we never send a remove message out but set the per ls ls_remove_len ls_remove_name variable for a pending remove. Unset those variable should indicate possible waiters in wait_pending_remove() that no pending remove is going on at this moment. Cc: stable@vger.kernel.org Signed-off-by: Alexander Aring Signed-off-by: David Teigland Signed-off-by: Sasha Levin --- fs/dlm/lock.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c index bb9e85e8819ce2..9f93496d2cc949 100644 --- a/fs/dlm/lock.c +++ b/fs/dlm/lock.c @@ -4065,13 +4065,14 @@ static void send_repeat_remove(struct dlm_ls *ls, char *ms_name, int len) rv = _create_message(ls, sizeof(struct dlm_message) + len, dir_nodeid, DLM_MSG_REMOVE, &ms, &mh); if (rv) - return; + goto out; memcpy(ms->m_extra, name, len); ms->m_hash = hash; send_message(mh, ms); +out: spin_lock(&ls->ls_remove_spin); ls->ls_remove_len = 0; memset(ls->ls_remove_name, 0, DLM_RESNAME_MAXLEN); From 823424905d03eefdaafa4c194ca775999998b2ba Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 2 Feb 2022 00:49:42 +0000 Subject: [PATCH 0892/1056] x86/uaccess: Implement macros for CMPXCHG on user addresses [ Upstream commit 989b5db215a2f22f89d730b607b071d964780f10 ] Add support for CMPXCHG loops on userspace addresses. Provide both an "unsafe" version for tight loops that do their own uaccess begin/end, as well as a "safe" version for use cases where the CMPXCHG is not buried in a loop, e.g. KVM will resume the guest instead of looping when emulation of a guest atomic accesses fails the CMPXCHG. Provide 8-byte versions for 32-bit kernels so that KVM can do CMPXCHG on guest PAE PTEs, which are accessed via userspace addresses. Guard the asm_volatile_goto() variation with CC_HAS_ASM_GOTO_TIED_OUTPUT, the "+m" constraint fails on some compilers that otherwise support CC_HAS_ASM_GOTO_OUTPUT. Cc: stable@vger.kernel.org Signed-off-by: Peter Zijlstra (Intel) Co-developed-by: Sean Christopherson Signed-off-by: Sean Christopherson Message-Id: <20220202004945.2540433-3-seanjc@google.com> Signed-off-by: Paolo Bonzini Signed-off-by: Sasha Levin --- arch/x86/include/asm/uaccess.h | 142 +++++++++++++++++++++++++++++++++ 1 file changed, 142 insertions(+) diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index bb1430283c726c..2f4c9c168b11d8 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -414,6 +414,103 @@ do { \ #endif // CONFIG_CC_ASM_GOTO_OUTPUT +#ifdef CONFIG_CC_HAS_ASM_GOTO_TIED_OUTPUT +#define __try_cmpxchg_user_asm(itype, ltype, _ptr, _pold, _new, label) ({ \ + bool success; \ + __typeof__(_ptr) _old = (__typeof__(_ptr))(_pold); \ + __typeof__(*(_ptr)) __old = *_old; \ + __typeof__(*(_ptr)) __new = (_new); \ + asm_volatile_goto("\n" \ + "1: " LOCK_PREFIX "cmpxchg"itype" %[new], %[ptr]\n"\ + _ASM_EXTABLE_UA(1b, %l[label]) \ + : CC_OUT(z) (success), \ + [ptr] "+m" (*_ptr), \ + [old] "+a" (__old) \ + : [new] ltype (__new) \ + : "memory" \ + : label); \ + if (unlikely(!success)) \ + *_old = __old; \ + likely(success); }) + +#ifdef CONFIG_X86_32 +#define __try_cmpxchg64_user_asm(_ptr, _pold, _new, label) ({ \ + bool success; \ + __typeof__(_ptr) _old = (__typeof__(_ptr))(_pold); \ + __typeof__(*(_ptr)) __old = *_old; \ + __typeof__(*(_ptr)) __new = (_new); \ + asm_volatile_goto("\n" \ + "1: " LOCK_PREFIX "cmpxchg8b %[ptr]\n" \ + _ASM_EXTABLE_UA(1b, %l[label]) \ + : CC_OUT(z) (success), \ + "+A" (__old), \ + [ptr] "+m" (*_ptr) \ + : "b" ((u32)__new), \ + "c" ((u32)((u64)__new >> 32)) \ + : "memory" \ + : label); \ + if (unlikely(!success)) \ + *_old = __old; \ + likely(success); }) +#endif // CONFIG_X86_32 +#else // !CONFIG_CC_HAS_ASM_GOTO_TIED_OUTPUT +#define __try_cmpxchg_user_asm(itype, ltype, _ptr, _pold, _new, label) ({ \ + int __err = 0; \ + bool success; \ + __typeof__(_ptr) _old = (__typeof__(_ptr))(_pold); \ + __typeof__(*(_ptr)) __old = *_old; \ + __typeof__(*(_ptr)) __new = (_new); \ + asm volatile("\n" \ + "1: " LOCK_PREFIX "cmpxchg"itype" %[new], %[ptr]\n"\ + CC_SET(z) \ + "2:\n" \ + _ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_EFAULT_REG, \ + %[errout]) \ + : CC_OUT(z) (success), \ + [errout] "+r" (__err), \ + [ptr] "+m" (*_ptr), \ + [old] "+a" (__old) \ + : [new] ltype (__new) \ + : "memory", "cc"); \ + if (unlikely(__err)) \ + goto label; \ + if (unlikely(!success)) \ + *_old = __old; \ + likely(success); }) + +#ifdef CONFIG_X86_32 +/* + * Unlike the normal CMPXCHG, hardcode ECX for both success/fail and error. + * There are only six GPRs available and four (EAX, EBX, ECX, and EDX) are + * hardcoded by CMPXCHG8B, leaving only ESI and EDI. If the compiler uses + * both ESI and EDI for the memory operand, compilation will fail if the error + * is an input+output as there will be no register available for input. + */ +#define __try_cmpxchg64_user_asm(_ptr, _pold, _new, label) ({ \ + int __result; \ + __typeof__(_ptr) _old = (__typeof__(_ptr))(_pold); \ + __typeof__(*(_ptr)) __old = *_old; \ + __typeof__(*(_ptr)) __new = (_new); \ + asm volatile("\n" \ + "1: " LOCK_PREFIX "cmpxchg8b %[ptr]\n" \ + "mov $0, %%ecx\n\t" \ + "setz %%cl\n" \ + "2:\n" \ + _ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_EFAULT_REG, %%ecx) \ + : [result]"=c" (__result), \ + "+A" (__old), \ + [ptr] "+m" (*_ptr) \ + : "b" ((u32)__new), \ + "c" ((u32)((u64)__new >> 32)) \ + : "memory", "cc"); \ + if (unlikely(__result < 0)) \ + goto label; \ + if (unlikely(!__result)) \ + *_old = __old; \ + likely(__result); }) +#endif // CONFIG_X86_32 +#endif // CONFIG_CC_HAS_ASM_GOTO_TIED_OUTPUT + /* FIXME: this hack is definitely wrong -AK */ struct __large_struct { unsigned long buf[100]; }; #define __m(x) (*(struct __large_struct __user *)(x)) @@ -506,6 +603,51 @@ do { \ } while (0) #endif // CONFIG_CC_HAS_ASM_GOTO_OUTPUT +extern void __try_cmpxchg_user_wrong_size(void); + +#ifndef CONFIG_X86_32 +#define __try_cmpxchg64_user_asm(_ptr, _oldp, _nval, _label) \ + __try_cmpxchg_user_asm("q", "r", (_ptr), (_oldp), (_nval), _label) +#endif + +/* + * Force the pointer to u to match the size expected by the asm helper. + * clang/LLVM compiles all cases and only discards the unused paths after + * processing errors, which breaks i386 if the pointer is an 8-byte value. + */ +#define unsafe_try_cmpxchg_user(_ptr, _oldp, _nval, _label) ({ \ + bool __ret; \ + __chk_user_ptr(_ptr); \ + switch (sizeof(*(_ptr))) { \ + case 1: __ret = __try_cmpxchg_user_asm("b", "q", \ + (__force u8 *)(_ptr), (_oldp), \ + (_nval), _label); \ + break; \ + case 2: __ret = __try_cmpxchg_user_asm("w", "r", \ + (__force u16 *)(_ptr), (_oldp), \ + (_nval), _label); \ + break; \ + case 4: __ret = __try_cmpxchg_user_asm("l", "r", \ + (__force u32 *)(_ptr), (_oldp), \ + (_nval), _label); \ + break; \ + case 8: __ret = __try_cmpxchg64_user_asm((__force u64 *)(_ptr), (_oldp),\ + (_nval), _label); \ + break; \ + default: __try_cmpxchg_user_wrong_size(); \ + } \ + __ret; }) + +/* "Returns" 0 on success, 1 on failure, -EFAULT if the access faults. */ +#define __try_cmpxchg_user(_ptr, _oldp, _nval, _label) ({ \ + int __ret = -EFAULT; \ + __uaccess_begin_nospec(); \ + __ret = !unsafe_try_cmpxchg_user(_ptr, _oldp, _nval, _label); \ +_label: \ + __uaccess_end(); \ + __ret; \ + }) + /* * We want the unsafe accessors to always be inlined and use * the error labels - thus the macro games. From d3135da29e1ad79cdee3a6c3686a4f4d3942e87f Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 8 Sep 2021 15:29:12 +0200 Subject: [PATCH 0893/1056] x86/extable: Tidy up redundant handler functions [ Upstream commit 326b567f82df0c4c8f50092b9af9a3014616fb3c ] No need to have the same code all over the place. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20210908132524.963232825@linutronix.de Signed-off-by: Sasha Levin --- arch/x86/mm/extable.c | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c index e1664e9f969c30..d9a1046f3a98ee 100644 --- a/arch/x86/mm/extable.c +++ b/arch/x86/mm/extable.c @@ -39,9 +39,8 @@ __visible bool ex_handler_fault(const struct exception_table_entry *fixup, unsigned long error_code, unsigned long fault_addr) { - regs->ip = ex_fixup_addr(fixup); regs->ax = trapnr; - return true; + return ex_handler_default(fixup, regs, trapnr, error_code, fault_addr); } EXPORT_SYMBOL_GPL(ex_handler_fault); @@ -76,8 +75,7 @@ __visible bool ex_handler_uaccess(const struct exception_table_entry *fixup, unsigned long fault_addr) { WARN_ONCE(trapnr == X86_TRAP_GP, "General protection fault in user access. Non-canonical address?"); - regs->ip = ex_fixup_addr(fixup); - return true; + return ex_handler_default(fixup, regs, trapnr, error_code, fault_addr); } EXPORT_SYMBOL(ex_handler_uaccess); @@ -87,9 +85,7 @@ __visible bool ex_handler_copy(const struct exception_table_entry *fixup, unsigned long fault_addr) { WARN_ONCE(trapnr == X86_TRAP_GP, "General protection fault in user access. Non-canonical address?"); - regs->ip = ex_fixup_addr(fixup); - regs->ax = trapnr; - return true; + return ex_handler_fault(fixup, regs, trapnr, error_code, fault_addr); } EXPORT_SYMBOL(ex_handler_copy); @@ -103,10 +99,9 @@ __visible bool ex_handler_rdmsr_unsafe(const struct exception_table_entry *fixup show_stack_regs(regs); /* Pretend that the read succeeded and returned 0. */ - regs->ip = ex_fixup_addr(fixup); regs->ax = 0; regs->dx = 0; - return true; + return ex_handler_default(fixup, regs, trapnr, error_code, fault_addr); } EXPORT_SYMBOL(ex_handler_rdmsr_unsafe); @@ -121,8 +116,7 @@ __visible bool ex_handler_wrmsr_unsafe(const struct exception_table_entry *fixup show_stack_regs(regs); /* Pretend that the write succeeded. */ - regs->ip = ex_fixup_addr(fixup); - return true; + return ex_handler_default(fixup, regs, trapnr, error_code, fault_addr); } EXPORT_SYMBOL(ex_handler_wrmsr_unsafe); From 3e6392ba3dceb8fef2db9d3095d503531ded2a0a Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 8 Sep 2021 15:29:13 +0200 Subject: [PATCH 0894/1056] x86/extable: Get rid of redundant macros [ Upstream commit 32fd8b59f91fcd3bf9459aa72d90345735cc2588 ] No point in defining the identical macros twice depending on C or assembly mode. They are still identical. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20210908132525.023659534@linutronix.de Signed-off-by: Sasha Levin --- arch/x86/include/asm/asm.h | 36 ++++++++++++------------------------ 1 file changed, 12 insertions(+), 24 deletions(-) diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h index 3ad3da9a7d9745..719955e658a215 100644 --- a/arch/x86/include/asm/asm.h +++ b/arch/x86/include/asm/asm.h @@ -132,18 +132,6 @@ .long (handler) - . ; \ .popsection -# define _ASM_EXTABLE(from, to) \ - _ASM_EXTABLE_HANDLE(from, to, ex_handler_default) - -# define _ASM_EXTABLE_UA(from, to) \ - _ASM_EXTABLE_HANDLE(from, to, ex_handler_uaccess) - -# define _ASM_EXTABLE_CPY(from, to) \ - _ASM_EXTABLE_HANDLE(from, to, ex_handler_copy) - -# define _ASM_EXTABLE_FAULT(from, to) \ - _ASM_EXTABLE_HANDLE(from, to, ex_handler_fault) - # ifdef CONFIG_KPROBES # define _ASM_NOKPROBE(entry) \ .pushsection "_kprobe_blacklist","aw" ; \ @@ -164,18 +152,6 @@ " .long (" _EXPAND_EXTABLE_HANDLE(handler) ") - .\n" \ " .popsection\n" -# define _ASM_EXTABLE(from, to) \ - _ASM_EXTABLE_HANDLE(from, to, ex_handler_default) - -# define _ASM_EXTABLE_UA(from, to) \ - _ASM_EXTABLE_HANDLE(from, to, ex_handler_uaccess) - -# define _ASM_EXTABLE_CPY(from, to) \ - _ASM_EXTABLE_HANDLE(from, to, ex_handler_copy) - -# define _ASM_EXTABLE_FAULT(from, to) \ - _ASM_EXTABLE_HANDLE(from, to, ex_handler_fault) - /* For C file, we already have NOKPROBE_SYMBOL macro */ /* @@ -188,6 +164,18 @@ register unsigned long current_stack_pointer asm(_ASM_SP); #define ASM_CALL_CONSTRAINT "+r" (current_stack_pointer) #endif /* __ASSEMBLY__ */ +#define _ASM_EXTABLE(from, to) \ + _ASM_EXTABLE_HANDLE(from, to, ex_handler_default) + +#define _ASM_EXTABLE_UA(from, to) \ + _ASM_EXTABLE_HANDLE(from, to, ex_handler_uaccess) + +#define _ASM_EXTABLE_CPY(from, to) \ + _ASM_EXTABLE_HANDLE(from, to, ex_handler_copy) + +#define _ASM_EXTABLE_FAULT(from, to) \ + _ASM_EXTABLE_HANDLE(from, to, ex_handler_fault) + #endif /* __KERNEL__ */ #endif /* _ASM_X86_ASM_H */ From ad2d98ea72d247fe8ca6f79de69ca287952c7770 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 8 Sep 2021 15:29:15 +0200 Subject: [PATCH 0895/1056] x86/mce: Deduplicate exception handling [ Upstream commit e42404afc4ca856c48f1e05752541faa3587c472 ] Prepare code for further simplification. No functional change. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20210908132525.096452100@linutronix.de Signed-off-by: Sasha Levin --- arch/x86/kernel/cpu/mce/core.c | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c index 848cfb013f58c2..d8da3acf1ffdf3 100644 --- a/arch/x86/kernel/cpu/mce/core.c +++ b/arch/x86/kernel/cpu/mce/core.c @@ -382,13 +382,16 @@ static int msr_to_offset(u32 msr) return -1; } -__visible bool ex_handler_rdmsr_fault(const struct exception_table_entry *fixup, - struct pt_regs *regs, int trapnr, - unsigned long error_code, - unsigned long fault_addr) +static void ex_handler_msr_mce(struct pt_regs *regs, bool wrmsr) { - pr_emerg("MSR access error: RDMSR from 0x%x at rIP: 0x%lx (%pS)\n", - (unsigned int)regs->cx, regs->ip, (void *)regs->ip); + if (wrmsr) { + pr_emerg("MSR access error: WRMSR to 0x%x (tried to write 0x%08x%08x) at rIP: 0x%lx (%pS)\n", + (unsigned int)regs->cx, (unsigned int)regs->dx, (unsigned int)regs->ax, + regs->ip, (void *)regs->ip); + } else { + pr_emerg("MSR access error: RDMSR from 0x%x at rIP: 0x%lx (%pS)\n", + (unsigned int)regs->cx, regs->ip, (void *)regs->ip); + } show_stack_regs(regs); @@ -396,7 +399,14 @@ __visible bool ex_handler_rdmsr_fault(const struct exception_table_entry *fixup, while (true) cpu_relax(); +} +__visible bool ex_handler_rdmsr_fault(const struct exception_table_entry *fixup, + struct pt_regs *regs, int trapnr, + unsigned long error_code, + unsigned long fault_addr) +{ + ex_handler_msr_mce(regs, false); return true; } @@ -441,17 +451,7 @@ __visible bool ex_handler_wrmsr_fault(const struct exception_table_entry *fixup, unsigned long error_code, unsigned long fault_addr) { - pr_emerg("MSR access error: WRMSR to 0x%x (tried to write 0x%08x%08x) at rIP: 0x%lx (%pS)\n", - (unsigned int)regs->cx, (unsigned int)regs->dx, (unsigned int)regs->ax, - regs->ip, (void *)regs->ip); - - show_stack_regs(regs); - - panic("MCA architectural violation!\n"); - - while (true) - cpu_relax(); - + ex_handler_msr_mce(regs, true); return true; } From 3461326e1a9e60d9e260da443972f73fef5d8e38 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 8 Sep 2021 15:29:18 +0200 Subject: [PATCH 0896/1056] x86/extable: Rework the exception table mechanics [ Upstream commit 46d28947d9876fc0f8f93d3c69813ef6e9852595 ] The exception table entries contain the instruction address, the fixup address and the handler address. All addresses are relative. Storing the handler address has a few downsides: 1) Most handlers need to be exported 2) Handlers can be defined everywhere and there is no overview about the handler types 3) MCE needs to check the handler type to decide whether an in kernel #MC can be recovered. The functionality of the handler itself is not in any way special, but for these checks there need to be separate functions which in the worst case have to be exported. Some of these 'recoverable' exception fixups are pretty obscure and just reuse some other handler to spare code. That obfuscates e.g. the #MC safe copy functions. Cleaning that up would require more handlers and exports Rework the exception fixup mechanics by storing a fixup type number instead of the handler address and invoke the proper handler for each fixup type. Also teach the extable sort to leave the type field alone. This makes most handlers static except for special cases like the MCE MSR fixup and the BPF fixup. This allows to add more types for cleaning up the obscure places without adding more handler code and exports. There is a marginal code size reduction for a production config and it removes _eight_ exported symbols. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Acked-by: Alexei Starovoitov Link: https://lkml.kernel.org/r/20210908132525.211958725@linutronix.de Signed-off-by: Sasha Levin --- arch/x86/include/asm/asm.h | 22 ++-- arch/x86/include/asm/extable.h | 44 +++++--- arch/x86/include/asm/extable_fixup_types.h | 19 ++++ arch/x86/include/asm/fpu/internal.h | 4 +- arch/x86/include/asm/msr.h | 4 +- arch/x86/include/asm/segment.h | 2 +- arch/x86/kernel/cpu/mce/core.c | 24 +--- arch/x86/kernel/cpu/mce/internal.h | 10 -- arch/x86/kernel/cpu/mce/severity.c | 21 ++-- arch/x86/mm/extable.c | 123 +++++++++------------ arch/x86/net/bpf_jit_comp.c | 11 +- scripts/sorttable.c | 4 +- 12 files changed, 133 insertions(+), 155 deletions(-) create mode 100644 arch/x86/include/asm/extable_fixup_types.h diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h index 719955e658a215..6aadb9a620eed0 100644 --- a/arch/x86/include/asm/asm.h +++ b/arch/x86/include/asm/asm.h @@ -122,14 +122,17 @@ #ifdef __KERNEL__ +# include + /* Exception table entry */ #ifdef __ASSEMBLY__ -# define _ASM_EXTABLE_HANDLE(from, to, handler) \ + +# define _ASM_EXTABLE_TYPE(from, to, type) \ .pushsection "__ex_table","a" ; \ .balign 4 ; \ .long (from) - . ; \ .long (to) - . ; \ - .long (handler) - . ; \ + .long type ; \ .popsection # ifdef CONFIG_KPROBES @@ -143,13 +146,13 @@ # endif #else /* ! __ASSEMBLY__ */ -# define _EXPAND_EXTABLE_HANDLE(x) #x -# define _ASM_EXTABLE_HANDLE(from, to, handler) \ + +# define _ASM_EXTABLE_TYPE(from, to, type) \ " .pushsection \"__ex_table\",\"a\"\n" \ " .balign 4\n" \ " .long (" #from ") - .\n" \ " .long (" #to ") - .\n" \ - " .long (" _EXPAND_EXTABLE_HANDLE(handler) ") - .\n" \ + " .long " __stringify(type) " \n" \ " .popsection\n" /* For C file, we already have NOKPROBE_SYMBOL macro */ @@ -165,17 +168,16 @@ register unsigned long current_stack_pointer asm(_ASM_SP); #endif /* __ASSEMBLY__ */ #define _ASM_EXTABLE(from, to) \ - _ASM_EXTABLE_HANDLE(from, to, ex_handler_default) + _ASM_EXTABLE_TYPE(from, to, EX_TYPE_DEFAULT) #define _ASM_EXTABLE_UA(from, to) \ - _ASM_EXTABLE_HANDLE(from, to, ex_handler_uaccess) + _ASM_EXTABLE_TYPE(from, to, EX_TYPE_UACCESS) #define _ASM_EXTABLE_CPY(from, to) \ - _ASM_EXTABLE_HANDLE(from, to, ex_handler_copy) + _ASM_EXTABLE_TYPE(from, to, EX_TYPE_COPY) #define _ASM_EXTABLE_FAULT(from, to) \ - _ASM_EXTABLE_HANDLE(from, to, ex_handler_fault) + _ASM_EXTABLE_TYPE(from, to, EX_TYPE_FAULT) #endif /* __KERNEL__ */ - #endif /* _ASM_X86_ASM_H */ diff --git a/arch/x86/include/asm/extable.h b/arch/x86/include/asm/extable.h index 1f0cbc52937ca5..93f400eb728f87 100644 --- a/arch/x86/include/asm/extable.h +++ b/arch/x86/include/asm/extable.h @@ -1,12 +1,18 @@ /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _ASM_X86_EXTABLE_H #define _ASM_X86_EXTABLE_H + +#include + /* - * The exception table consists of triples of addresses relative to the - * exception table entry itself. The first address is of an instruction - * that is allowed to fault, the second is the target at which the program - * should continue. The third is a handler function to deal with the fault - * caused by the instruction in the first field. + * The exception table consists of two addresses relative to the + * exception table entry itself and a type selector field. + * + * The first address is of an instruction that is allowed to fault, the + * second is the target at which the program should continue. + * + * The type entry is used by fixup_exception() to select the handler to + * deal with the fault caused by the instruction in the first field. * * All the routines below use bits of fixup code that are out of line * with the main instruction path. This means when everything is well, @@ -15,7 +21,7 @@ */ struct exception_table_entry { - int insn, fixup, handler; + int insn, fixup, type; }; struct pt_regs; @@ -25,21 +31,27 @@ struct pt_regs; do { \ (a)->fixup = (b)->fixup + (delta); \ (b)->fixup = (tmp).fixup - (delta); \ - (a)->handler = (b)->handler + (delta); \ - (b)->handler = (tmp).handler - (delta); \ + (a)->type = (b)->type; \ + (b)->type = (tmp).type; \ } while (0) -enum handler_type { - EX_HANDLER_NONE, - EX_HANDLER_FAULT, - EX_HANDLER_UACCESS, - EX_HANDLER_OTHER -}; - extern int fixup_exception(struct pt_regs *regs, int trapnr, unsigned long error_code, unsigned long fault_addr); extern int fixup_bug(struct pt_regs *regs, int trapnr); -extern enum handler_type ex_get_fault_handler_type(unsigned long ip); +extern int ex_get_fixup_type(unsigned long ip); extern void early_fixup_exception(struct pt_regs *regs, int trapnr); +#ifdef CONFIG_X86_MCE +extern void ex_handler_msr_mce(struct pt_regs *regs, bool wrmsr); +#else +static inline void ex_handler_msr_mce(struct pt_regs *regs, bool wrmsr) { } +#endif + +#if defined(CONFIG_BPF_JIT) && defined(CONFIG_X86_64) +bool ex_handler_bpf(const struct exception_table_entry *x, struct pt_regs *regs); +#else +static inline bool ex_handler_bpf(const struct exception_table_entry *x, + struct pt_regs *regs) { return false; } +#endif + #endif diff --git a/arch/x86/include/asm/extable_fixup_types.h b/arch/x86/include/asm/extable_fixup_types.h new file mode 100644 index 00000000000000..0adc117618e6ee --- /dev/null +++ b/arch/x86/include/asm/extable_fixup_types.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_EXTABLE_FIXUP_TYPES_H +#define _ASM_X86_EXTABLE_FIXUP_TYPES_H + +#define EX_TYPE_NONE 0 +#define EX_TYPE_DEFAULT 1 +#define EX_TYPE_FAULT 2 +#define EX_TYPE_UACCESS 3 +#define EX_TYPE_COPY 4 +#define EX_TYPE_CLEAR_FS 5 +#define EX_TYPE_FPU_RESTORE 6 +#define EX_TYPE_WRMSR 7 +#define EX_TYPE_RDMSR 8 +#define EX_TYPE_BPF 9 + +#define EX_TYPE_WRMSR_IN_MCE 10 +#define EX_TYPE_RDMSR_IN_MCE 11 + +#endif diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index 5a18694a89b24d..ce6fc4f8d1d115 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -126,7 +126,7 @@ extern void save_fpregs_to_fpstate(struct fpu *fpu); #define kernel_insn(insn, output, input...) \ asm volatile("1:" #insn "\n\t" \ "2:\n" \ - _ASM_EXTABLE_HANDLE(1b, 2b, ex_handler_fprestore) \ + _ASM_EXTABLE_TYPE(1b, 2b, EX_TYPE_FPU_RESTORE) \ : output : input) static inline int fnsave_to_user_sigframe(struct fregs_state __user *fx) @@ -253,7 +253,7 @@ static inline void fxsave(struct fxregs_state *fx) XRSTORS, X86_FEATURE_XSAVES) \ "\n" \ "3:\n" \ - _ASM_EXTABLE_HANDLE(661b, 3b, ex_handler_fprestore)\ + _ASM_EXTABLE_TYPE(661b, 3b, EX_TYPE_FPU_RESTORE) \ : \ : "D" (st), "m" (*st), "a" (lmask), "d" (hmask) \ : "memory") diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h index a3f87f1015d3d6..6b52182e178a77 100644 --- a/arch/x86/include/asm/msr.h +++ b/arch/x86/include/asm/msr.h @@ -92,7 +92,7 @@ static __always_inline unsigned long long __rdmsr(unsigned int msr) asm volatile("1: rdmsr\n" "2:\n" - _ASM_EXTABLE_HANDLE(1b, 2b, ex_handler_rdmsr_unsafe) + _ASM_EXTABLE_TYPE(1b, 2b, EX_TYPE_RDMSR) : EAX_EDX_RET(val, low, high) : "c" (msr)); return EAX_EDX_VAL(val, low, high); @@ -102,7 +102,7 @@ static __always_inline void __wrmsr(unsigned int msr, u32 low, u32 high) { asm volatile("1: wrmsr\n" "2:\n" - _ASM_EXTABLE_HANDLE(1b, 2b, ex_handler_wrmsr_unsafe) + _ASM_EXTABLE_TYPE(1b, 2b, EX_TYPE_WRMSR) : : "c" (msr), "a"(low), "d" (high) : "memory"); } diff --git a/arch/x86/include/asm/segment.h b/arch/x86/include/asm/segment.h index 72044026eb3c2c..8dd8e8ec9fa552 100644 --- a/arch/x86/include/asm/segment.h +++ b/arch/x86/include/asm/segment.h @@ -339,7 +339,7 @@ static inline void __loadsegment_fs(unsigned short value) "1: movw %0, %%fs \n" "2: \n" - _ASM_EXTABLE_HANDLE(1b, 2b, ex_handler_clear_fs) + _ASM_EXTABLE_TYPE(1b, 2b, EX_TYPE_CLEAR_FS) : : "rm" (value) : "memory"); } diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c index d8da3acf1ffdf3..773037e5fd761b 100644 --- a/arch/x86/kernel/cpu/mce/core.c +++ b/arch/x86/kernel/cpu/mce/core.c @@ -382,7 +382,7 @@ static int msr_to_offset(u32 msr) return -1; } -static void ex_handler_msr_mce(struct pt_regs *regs, bool wrmsr) +void ex_handler_msr_mce(struct pt_regs *regs, bool wrmsr) { if (wrmsr) { pr_emerg("MSR access error: WRMSR to 0x%x (tried to write 0x%08x%08x) at rIP: 0x%lx (%pS)\n", @@ -401,15 +401,6 @@ static void ex_handler_msr_mce(struct pt_regs *regs, bool wrmsr) cpu_relax(); } -__visible bool ex_handler_rdmsr_fault(const struct exception_table_entry *fixup, - struct pt_regs *regs, int trapnr, - unsigned long error_code, - unsigned long fault_addr) -{ - ex_handler_msr_mce(regs, false); - return true; -} - /* MSR access wrappers used for error injection */ static noinstr u64 mce_rdmsrl(u32 msr) { @@ -439,22 +430,13 @@ static noinstr u64 mce_rdmsrl(u32 msr) */ asm volatile("1: rdmsr\n" "2:\n" - _ASM_EXTABLE_HANDLE(1b, 2b, ex_handler_rdmsr_fault) + _ASM_EXTABLE_TYPE(1b, 2b, EX_TYPE_RDMSR_IN_MCE) : EAX_EDX_RET(val, low, high) : "c" (msr)); return EAX_EDX_VAL(val, low, high); } -__visible bool ex_handler_wrmsr_fault(const struct exception_table_entry *fixup, - struct pt_regs *regs, int trapnr, - unsigned long error_code, - unsigned long fault_addr) -{ - ex_handler_msr_mce(regs, true); - return true; -} - static noinstr void mce_wrmsrl(u32 msr, u64 v) { u32 low, high; @@ -479,7 +461,7 @@ static noinstr void mce_wrmsrl(u32 msr, u64 v) /* See comment in mce_rdmsrl() */ asm volatile("1: wrmsr\n" "2:\n" - _ASM_EXTABLE_HANDLE(1b, 2b, ex_handler_wrmsr_fault) + _ASM_EXTABLE_TYPE(1b, 2b, EX_TYPE_WRMSR_IN_MCE) : : "c" (msr), "a"(low), "d" (high) : "memory"); } diff --git a/arch/x86/kernel/cpu/mce/internal.h b/arch/x86/kernel/cpu/mce/internal.h index 88dcc79cfb07de..80dc94313bcfc3 100644 --- a/arch/x86/kernel/cpu/mce/internal.h +++ b/arch/x86/kernel/cpu/mce/internal.h @@ -186,14 +186,4 @@ extern bool amd_filter_mce(struct mce *m); static inline bool amd_filter_mce(struct mce *m) { return false; }; #endif -__visible bool ex_handler_rdmsr_fault(const struct exception_table_entry *fixup, - struct pt_regs *regs, int trapnr, - unsigned long error_code, - unsigned long fault_addr); - -__visible bool ex_handler_wrmsr_fault(const struct exception_table_entry *fixup, - struct pt_regs *regs, int trapnr, - unsigned long error_code, - unsigned long fault_addr); - #endif /* __X86_MCE_INTERNAL_H__ */ diff --git a/arch/x86/kernel/cpu/mce/severity.c b/arch/x86/kernel/cpu/mce/severity.c index 17e63144311693..74fe763bffdab2 100644 --- a/arch/x86/kernel/cpu/mce/severity.c +++ b/arch/x86/kernel/cpu/mce/severity.c @@ -265,25 +265,24 @@ static bool is_copy_from_user(struct pt_regs *regs) */ static int error_context(struct mce *m, struct pt_regs *regs) { - enum handler_type t; - if ((m->cs & 3) == 3) return IN_USER; if (!mc_recoverable(m->mcgstatus)) return IN_KERNEL; - t = ex_get_fault_handler_type(m->ip); - if (t == EX_HANDLER_FAULT) { - m->kflags |= MCE_IN_KERNEL_RECOV; - return IN_KERNEL_RECOV; - } - if (t == EX_HANDLER_UACCESS && regs && is_copy_from_user(regs)) { - m->kflags |= MCE_IN_KERNEL_RECOV; + switch (ex_get_fixup_type(m->ip)) { + case EX_TYPE_UACCESS: + case EX_TYPE_COPY: + if (!regs || !is_copy_from_user(regs)) + return IN_KERNEL; m->kflags |= MCE_IN_KERNEL_COPYIN; + fallthrough; + case EX_TYPE_FAULT: + m->kflags |= MCE_IN_KERNEL_RECOV; return IN_KERNEL_RECOV; + default: + return IN_KERNEL; } - - return IN_KERNEL; } static int mce_severity_amd_smca(struct mce *m, enum context err_ctx) diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c index d9a1046f3a98ee..5db46df409b50d 100644 --- a/arch/x86/mm/extable.c +++ b/arch/x86/mm/extable.c @@ -9,40 +9,25 @@ #include #include -typedef bool (*ex_handler_t)(const struct exception_table_entry *, - struct pt_regs *, int, unsigned long, - unsigned long); - static inline unsigned long ex_fixup_addr(const struct exception_table_entry *x) { return (unsigned long)&x->fixup + x->fixup; } -static inline ex_handler_t -ex_fixup_handler(const struct exception_table_entry *x) -{ - return (ex_handler_t)((unsigned long)&x->handler + x->handler); -} -__visible bool ex_handler_default(const struct exception_table_entry *fixup, - struct pt_regs *regs, int trapnr, - unsigned long error_code, - unsigned long fault_addr) +static bool ex_handler_default(const struct exception_table_entry *fixup, + struct pt_regs *regs) { regs->ip = ex_fixup_addr(fixup); return true; } -EXPORT_SYMBOL(ex_handler_default); -__visible bool ex_handler_fault(const struct exception_table_entry *fixup, - struct pt_regs *regs, int trapnr, - unsigned long error_code, - unsigned long fault_addr) +static bool ex_handler_fault(const struct exception_table_entry *fixup, + struct pt_regs *regs, int trapnr) { regs->ax = trapnr; - return ex_handler_default(fixup, regs, trapnr, error_code, fault_addr); + return ex_handler_default(fixup, regs); } -EXPORT_SYMBOL_GPL(ex_handler_fault); /* * Handler for when we fail to restore a task's FPU state. We should never get @@ -54,10 +39,8 @@ EXPORT_SYMBOL_GPL(ex_handler_fault); * of vulnerability by restoring from the initial state (essentially, zeroing * out all the FPU registers) if we can't restore from the task's FPU state. */ -__visible bool ex_handler_fprestore(const struct exception_table_entry *fixup, - struct pt_regs *regs, int trapnr, - unsigned long error_code, - unsigned long fault_addr) +static bool ex_handler_fprestore(const struct exception_table_entry *fixup, + struct pt_regs *regs) { regs->ip = ex_fixup_addr(fixup); @@ -67,32 +50,23 @@ __visible bool ex_handler_fprestore(const struct exception_table_entry *fixup, __restore_fpregs_from_fpstate(&init_fpstate, xfeatures_mask_fpstate()); return true; } -EXPORT_SYMBOL_GPL(ex_handler_fprestore); -__visible bool ex_handler_uaccess(const struct exception_table_entry *fixup, - struct pt_regs *regs, int trapnr, - unsigned long error_code, - unsigned long fault_addr) +static bool ex_handler_uaccess(const struct exception_table_entry *fixup, + struct pt_regs *regs, int trapnr) { WARN_ONCE(trapnr == X86_TRAP_GP, "General protection fault in user access. Non-canonical address?"); - return ex_handler_default(fixup, regs, trapnr, error_code, fault_addr); + return ex_handler_default(fixup, regs); } -EXPORT_SYMBOL(ex_handler_uaccess); -__visible bool ex_handler_copy(const struct exception_table_entry *fixup, - struct pt_regs *regs, int trapnr, - unsigned long error_code, - unsigned long fault_addr) +static bool ex_handler_copy(const struct exception_table_entry *fixup, + struct pt_regs *regs, int trapnr) { WARN_ONCE(trapnr == X86_TRAP_GP, "General protection fault in user access. Non-canonical address?"); - return ex_handler_fault(fixup, regs, trapnr, error_code, fault_addr); + return ex_handler_fault(fixup, regs, trapnr); } -EXPORT_SYMBOL(ex_handler_copy); -__visible bool ex_handler_rdmsr_unsafe(const struct exception_table_entry *fixup, - struct pt_regs *regs, int trapnr, - unsigned long error_code, - unsigned long fault_addr) +static bool ex_handler_rdmsr_unsafe(const struct exception_table_entry *fixup, + struct pt_regs *regs) { if (pr_warn_once("unchecked MSR access error: RDMSR from 0x%x at rIP: 0x%lx (%pS)\n", (unsigned int)regs->cx, regs->ip, (void *)regs->ip)) @@ -101,14 +75,11 @@ __visible bool ex_handler_rdmsr_unsafe(const struct exception_table_entry *fixup /* Pretend that the read succeeded and returned 0. */ regs->ax = 0; regs->dx = 0; - return ex_handler_default(fixup, regs, trapnr, error_code, fault_addr); + return ex_handler_default(fixup, regs); } -EXPORT_SYMBOL(ex_handler_rdmsr_unsafe); -__visible bool ex_handler_wrmsr_unsafe(const struct exception_table_entry *fixup, - struct pt_regs *regs, int trapnr, - unsigned long error_code, - unsigned long fault_addr) +static bool ex_handler_wrmsr_unsafe(const struct exception_table_entry *fixup, + struct pt_regs *regs) { if (pr_warn_once("unchecked MSR access error: WRMSR to 0x%x (tried to write 0x%08x%08x) at rIP: 0x%lx (%pS)\n", (unsigned int)regs->cx, (unsigned int)regs->dx, @@ -116,44 +87,29 @@ __visible bool ex_handler_wrmsr_unsafe(const struct exception_table_entry *fixup show_stack_regs(regs); /* Pretend that the write succeeded. */ - return ex_handler_default(fixup, regs, trapnr, error_code, fault_addr); + return ex_handler_default(fixup, regs); } -EXPORT_SYMBOL(ex_handler_wrmsr_unsafe); -__visible bool ex_handler_clear_fs(const struct exception_table_entry *fixup, - struct pt_regs *regs, int trapnr, - unsigned long error_code, - unsigned long fault_addr) +static bool ex_handler_clear_fs(const struct exception_table_entry *fixup, + struct pt_regs *regs) { if (static_cpu_has(X86_BUG_NULL_SEG)) asm volatile ("mov %0, %%fs" : : "rm" (__USER_DS)); asm volatile ("mov %0, %%fs" : : "rm" (0)); - return ex_handler_default(fixup, regs, trapnr, error_code, fault_addr); + return ex_handler_default(fixup, regs); } -EXPORT_SYMBOL(ex_handler_clear_fs); -enum handler_type ex_get_fault_handler_type(unsigned long ip) +int ex_get_fixup_type(unsigned long ip) { - const struct exception_table_entry *e; - ex_handler_t handler; + const struct exception_table_entry *e = search_exception_tables(ip); - e = search_exception_tables(ip); - if (!e) - return EX_HANDLER_NONE; - handler = ex_fixup_handler(e); - if (handler == ex_handler_fault) - return EX_HANDLER_FAULT; - else if (handler == ex_handler_uaccess || handler == ex_handler_copy) - return EX_HANDLER_UACCESS; - else - return EX_HANDLER_OTHER; + return e ? e->type : EX_TYPE_NONE; } int fixup_exception(struct pt_regs *regs, int trapnr, unsigned long error_code, unsigned long fault_addr) { const struct exception_table_entry *e; - ex_handler_t handler; #ifdef CONFIG_PNPBIOS if (unlikely(SEGMENT_IS_PNP_CODE(regs->cs))) { @@ -173,8 +129,33 @@ int fixup_exception(struct pt_regs *regs, int trapnr, unsigned long error_code, if (!e) return 0; - handler = ex_fixup_handler(e); - return handler(e, regs, trapnr, error_code, fault_addr); + switch (e->type) { + case EX_TYPE_DEFAULT: + return ex_handler_default(e, regs); + case EX_TYPE_FAULT: + return ex_handler_fault(e, regs, trapnr); + case EX_TYPE_UACCESS: + return ex_handler_uaccess(e, regs, trapnr); + case EX_TYPE_COPY: + return ex_handler_copy(e, regs, trapnr); + case EX_TYPE_CLEAR_FS: + return ex_handler_clear_fs(e, regs); + case EX_TYPE_FPU_RESTORE: + return ex_handler_fprestore(e, regs); + case EX_TYPE_RDMSR: + return ex_handler_rdmsr_unsafe(e, regs); + case EX_TYPE_WRMSR: + return ex_handler_wrmsr_unsafe(e, regs); + case EX_TYPE_BPF: + return ex_handler_bpf(e, regs); + case EX_TYPE_RDMSR_IN_MCE: + ex_handler_msr_mce(regs, false); + break; + case EX_TYPE_WRMSR_IN_MCE: + ex_handler_msr_mce(regs, true); + break; + } + BUG(); } extern unsigned int early_recursion_flag; diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 131f7ceb54dc60..4f3a6003715034 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -832,9 +832,7 @@ static int emit_atomic(u8 **pprog, u8 atomic_op, return 0; } -static bool ex_handler_bpf(const struct exception_table_entry *x, - struct pt_regs *regs, int trapnr, - unsigned long error_code, unsigned long fault_addr) +bool ex_handler_bpf(const struct exception_table_entry *x, struct pt_regs *regs) { u32 reg = x->fixup >> 8; @@ -1344,12 +1342,7 @@ st: if (is_imm8(insn->off)) } ex->insn = delta; - delta = (u8 *)ex_handler_bpf - (u8 *)&ex->handler; - if (!is_simm32(delta)) { - pr_err("extable->handler doesn't fit into 32-bit\n"); - return -EFAULT; - } - ex->handler = delta; + ex->type = EX_TYPE_BPF; if (dst_reg > BPF_REG_9) { pr_err("verifier error\n"); diff --git a/scripts/sorttable.c b/scripts/sorttable.c index 6ee4fa882919c6..278bb53b325c14 100644 --- a/scripts/sorttable.c +++ b/scripts/sorttable.c @@ -240,7 +240,7 @@ static void x86_sort_relative_table(char *extab_image, int image_size) w(r(loc) + i, loc); w(r(loc + 1) + i + 4, loc + 1); - w(r(loc + 2) + i + 8, loc + 2); + /* Don't touch the fixup type */ i += sizeof(uint32_t) * 3; } @@ -253,7 +253,7 @@ static void x86_sort_relative_table(char *extab_image, int image_size) w(r(loc) - i, loc); w(r(loc + 1) - (i + 4), loc + 1); - w(r(loc + 2) - (i + 8), loc + 2); + /* Don't touch the fixup type */ i += sizeof(uint32_t) * 3; } From a86201c3ae79f3652b4a865028fc9b71a2904289 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 8 Sep 2021 15:29:19 +0200 Subject: [PATCH 0897/1056] x86/extable: Provide EX_TYPE_DEFAULT_MCE_SAFE and EX_TYPE_FAULT_MCE_SAFE [ Upstream commit 2cadf5248b9316d3c8af876e795d61c55476f6e9 ] Provide exception fixup types which can be used to identify fixups which allow in kernel #MC recovery and make them invoke the existing handlers. These will be used at places where #MC recovery is handled correctly by the caller. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20210908132525.269689153@linutronix.de Signed-off-by: Sasha Levin --- arch/x86/include/asm/extable_fixup_types.h | 3 +++ arch/x86/kernel/cpu/mce/severity.c | 2 ++ arch/x86/mm/extable.c | 2 ++ 3 files changed, 7 insertions(+) diff --git a/arch/x86/include/asm/extable_fixup_types.h b/arch/x86/include/asm/extable_fixup_types.h index 0adc117618e6ee..409524d5d2eb1f 100644 --- a/arch/x86/include/asm/extable_fixup_types.h +++ b/arch/x86/include/asm/extable_fixup_types.h @@ -16,4 +16,7 @@ #define EX_TYPE_WRMSR_IN_MCE 10 #define EX_TYPE_RDMSR_IN_MCE 11 +#define EX_TYPE_DEFAULT_MCE_SAFE 12 +#define EX_TYPE_FAULT_MCE_SAFE 13 + #endif diff --git a/arch/x86/kernel/cpu/mce/severity.c b/arch/x86/kernel/cpu/mce/severity.c index 74fe763bffdab2..d9b77a74f8d2e3 100644 --- a/arch/x86/kernel/cpu/mce/severity.c +++ b/arch/x86/kernel/cpu/mce/severity.c @@ -278,6 +278,8 @@ static int error_context(struct mce *m, struct pt_regs *regs) m->kflags |= MCE_IN_KERNEL_COPYIN; fallthrough; case EX_TYPE_FAULT: + case EX_TYPE_FAULT_MCE_SAFE: + case EX_TYPE_DEFAULT_MCE_SAFE: m->kflags |= MCE_IN_KERNEL_RECOV; return IN_KERNEL_RECOV; default: diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c index 5db46df409b50d..f37e290e6d0ab4 100644 --- a/arch/x86/mm/extable.c +++ b/arch/x86/mm/extable.c @@ -131,8 +131,10 @@ int fixup_exception(struct pt_regs *regs, int trapnr, unsigned long error_code, switch (e->type) { case EX_TYPE_DEFAULT: + case EX_TYPE_DEFAULT_MCE_SAFE: return ex_handler_default(e, regs); case EX_TYPE_FAULT: + case EX_TYPE_FAULT_MCE_SAFE: return ex_handler_fault(e, regs, trapnr); case EX_TYPE_UACCESS: return ex_handler_uaccess(e, regs, trapnr); From e89d805e77f20724b97a9a6983c4d7d7c80652f0 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 10 Nov 2021 11:01:03 +0100 Subject: [PATCH 0898/1056] bitfield.h: Fix "type of reg too small for mask" test [ Upstream commit bff8c3848e071d387d8b0784dc91fa49cd563774 ] The test: 'mask > (typeof(_reg))~0ull' only works correctly when both sides are unsigned, consider: - 0xff000000 vs (int)~0ull - 0x000000ff vs (int)~0ull Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Josh Poimboeuf Link: https://lore.kernel.org/r/20211110101324.950210584@infradead.org Signed-off-by: Sasha Levin --- include/linux/bitfield.h | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/include/linux/bitfield.h b/include/linux/bitfield.h index 4e035aca6f7e60..6093fa6db2600b 100644 --- a/include/linux/bitfield.h +++ b/include/linux/bitfield.h @@ -41,6 +41,22 @@ #define __bf_shf(x) (__builtin_ffsll(x) - 1) +#define __scalar_type_to_unsigned_cases(type) \ + unsigned type: (unsigned type)0, \ + signed type: (unsigned type)0 + +#define __unsigned_scalar_typeof(x) typeof( \ + _Generic((x), \ + char: (unsigned char)0, \ + __scalar_type_to_unsigned_cases(char), \ + __scalar_type_to_unsigned_cases(short), \ + __scalar_type_to_unsigned_cases(int), \ + __scalar_type_to_unsigned_cases(long), \ + __scalar_type_to_unsigned_cases(long long), \ + default: (x))) + +#define __bf_cast_unsigned(type, x) ((__unsigned_scalar_typeof(type))(x)) + #define __BF_FIELD_CHECK(_mask, _reg, _val, _pfx) \ ({ \ BUILD_BUG_ON_MSG(!__builtin_constant_p(_mask), \ @@ -49,7 +65,8 @@ BUILD_BUG_ON_MSG(__builtin_constant_p(_val) ? \ ~((_mask) >> __bf_shf(_mask)) & (_val) : 0, \ _pfx "value too large for the field"); \ - BUILD_BUG_ON_MSG((_mask) > (typeof(_reg))~0ull, \ + BUILD_BUG_ON_MSG(__bf_cast_unsigned(_mask, _mask) > \ + __bf_cast_unsigned(_reg, ~0ull), \ _pfx "type of reg too small for mask"); \ __BUILD_BUG_ON_NOT_POWER_OF_2((_mask) + \ (1ULL << __bf_shf(_mask))); \ From 4bbc9a07e050ffb141950e198f227e2caa9962d4 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 10 Nov 2021 11:01:08 +0100 Subject: [PATCH 0899/1056] x86/entry_32: Remove .fixup usage [ Upstream commit aa93e2ad7464ffb90155a5ffdde963816f86d5dc ] Where possible, push the .fixup into code, at the tail of functions. This is hard for macros since they're used in multiple functions, therefore introduce a new extable handler to pop zeros. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Josh Poimboeuf Link: https://lore.kernel.org/r/20211110101325.245184699@infradead.org Signed-off-by: Sasha Levin --- arch/x86/entry/entry_32.S | 28 +++++++--------------- arch/x86/include/asm/extable_fixup_types.h | 2 ++ arch/x86/mm/extable.c | 14 +++++++++++ 3 files changed, 24 insertions(+), 20 deletions(-) diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index 5bd3baf36d874b..2cba70f9753bc0 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -270,17 +270,9 @@ 3: popl %fs addl $(4 + \pop), %esp /* pop the unused "gs" slot */ IRET_FRAME -.pushsection .fixup, "ax" -4: movl $0, (%esp) - jmp 1b -5: movl $0, (%esp) - jmp 2b -6: movl $0, (%esp) - jmp 3b -.popsection - _ASM_EXTABLE(1b, 4b) - _ASM_EXTABLE(2b, 5b) - _ASM_EXTABLE(3b, 6b) + _ASM_EXTABLE_TYPE(1b, 1b, EX_TYPE_POP_ZERO) + _ASM_EXTABLE_TYPE(2b, 2b, EX_TYPE_POP_ZERO) + _ASM_EXTABLE_TYPE(3b, 3b, EX_TYPE_POP_ZERO) .endm .macro RESTORE_ALL_NMI cr3_reg:req pop=0 @@ -923,10 +915,8 @@ SYM_FUNC_START(entry_SYSENTER_32) sti sysexit -.pushsection .fixup, "ax" -2: movl $0, PT_FS(%esp) - jmp 1b -.popsection +2: movl $0, PT_FS(%esp) + jmp 1b _ASM_EXTABLE(1b, 2b) .Lsysenter_fix_flags: @@ -994,8 +984,7 @@ restore_all_switch_stack: */ iret -.section .fixup, "ax" -SYM_CODE_START(asm_iret_error) +.Lasm_iret_error: pushl $0 # no error code pushl $iret_error @@ -1012,9 +1001,8 @@ SYM_CODE_START(asm_iret_error) #endif jmp handle_exception -SYM_CODE_END(asm_iret_error) -.previous - _ASM_EXTABLE(.Lirq_return, asm_iret_error) + + _ASM_EXTABLE(.Lirq_return, .Lasm_iret_error) SYM_FUNC_END(entry_INT80_32) .macro FIXUP_ESPFIX_STACK diff --git a/arch/x86/include/asm/extable_fixup_types.h b/arch/x86/include/asm/extable_fixup_types.h index 409524d5d2eb1f..4d709a2768bb88 100644 --- a/arch/x86/include/asm/extable_fixup_types.h +++ b/arch/x86/include/asm/extable_fixup_types.h @@ -19,4 +19,6 @@ #define EX_TYPE_DEFAULT_MCE_SAFE 12 #define EX_TYPE_FAULT_MCE_SAFE 13 +#define EX_TYPE_POP_ZERO 14 + #endif diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c index f37e290e6d0ab4..f59a4d017070a2 100644 --- a/arch/x86/mm/extable.c +++ b/arch/x86/mm/extable.c @@ -99,6 +99,18 @@ static bool ex_handler_clear_fs(const struct exception_table_entry *fixup, return ex_handler_default(fixup, regs); } +static bool ex_handler_pop_zero(const struct exception_table_entry *fixup, + struct pt_regs *regs) +{ + /* + * Typically used for when "pop %seg" traps, in which case we'll clear + * the stack slot and re-try the instruction, which will then succeed + * to pop zero. + */ + *((unsigned long *)regs->sp) = 0; + return ex_handler_default(fixup, regs); +} + int ex_get_fixup_type(unsigned long ip) { const struct exception_table_entry *e = search_exception_tables(ip); @@ -156,6 +168,8 @@ int fixup_exception(struct pt_regs *regs, int trapnr, unsigned long error_code, case EX_TYPE_WRMSR_IN_MCE: ex_handler_msr_mce(regs, true); break; + case EX_TYPE_POP_ZERO: + return ex_handler_pop_zero(e, regs); } BUG(); } From f637fbc7bda60c0dca47925d5e2da948598230b6 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 10 Nov 2021 11:01:09 +0100 Subject: [PATCH 0900/1056] x86/extable: Extend extable functionality [ Upstream commit 4b5305decc8436bfe363d1c1773e8fa1c828b14d ] In order to remove further .fixup usage, extend the extable infrastructure to take additional information from the extable entry sites. Specifically add _ASM_EXTABLE_TYPE_REG() and EX_TYPE_IMM_REG that extend the existing _ASM_EXTABLE_TYPE() by taking an additional register argument and encoding that and an s16 immediate into the existing s32 type field. This limits the actual types to the first byte, 255 seem plenty. Also add a few flags into the type word, specifically CLEAR_AX and CLEAR_DX which clear the return and extended return register. Notes: - due to the % in our register names it's hard to make it more generally usable as arm64 did. - the s16 is far larger than used in these patches, future extentions can easily shrink this to get more bits. - without the bitfield fix this will not compile, because: 0xFF > -1 and we can't even extract the TYPE field. [nathanchance: Build fix for clang-lto builds: https://lkml.kernel.org/r/20211210234953.3420108-1-nathan@kernel.org ] Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Josh Poimboeuf Reviewed-by: Nick Desaulniers Tested-by: Nick Desaulniers Link: https://lore.kernel.org/r/20211110101325.303890153@infradead.org Signed-off-by: Sasha Levin --- arch/x86/include/asm/asm.h | 37 ++++++++++++ arch/x86/include/asm/extable.h | 6 +- arch/x86/include/asm/extable_fixup_types.h | 24 ++++++++ arch/x86/include/asm/insn-eval.h | 2 + arch/x86/lib/insn-eval.c | 66 ++++++++++++++-------- arch/x86/mm/extable.c | 40 +++++++++++-- arch/x86/net/bpf_jit_comp.c | 2 +- 7 files changed, 145 insertions(+), 32 deletions(-) diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h index 6aadb9a620eed0..8cbdfd77a3909c 100644 --- a/arch/x86/include/asm/asm.h +++ b/arch/x86/include/asm/asm.h @@ -147,6 +147,33 @@ #else /* ! __ASSEMBLY__ */ +# define DEFINE_EXTABLE_TYPE_REG \ + ".macro extable_type_reg type:req reg:req\n" \ + ".set found, 0\n" \ + ".set regnr, 0\n" \ + ".irp rs,rax,rcx,rdx,rbx,rsp,rbp,rsi,rdi,r8,r9,r10,r11,r12,r13,r14,r15\n" \ + ".ifc \\reg, %%\\rs\n" \ + ".set found, found+1\n" \ + ".long \\type + (regnr << 8)\n" \ + ".endif\n" \ + ".set regnr, regnr+1\n" \ + ".endr\n" \ + ".set regnr, 0\n" \ + ".irp rs,eax,ecx,edx,ebx,esp,ebp,esi,edi,r8d,r9d,r10d,r11d,r12d,r13d,r14d,r15d\n" \ + ".ifc \\reg, %%\\rs\n" \ + ".set found, found+1\n" \ + ".long \\type + (regnr << 8)\n" \ + ".endif\n" \ + ".set regnr, regnr+1\n" \ + ".endr\n" \ + ".if (found != 1)\n" \ + ".error \"extable_type_reg: bad register argument\"\n" \ + ".endif\n" \ + ".endm\n" + +# define UNDEFINE_EXTABLE_TYPE_REG \ + ".purgem extable_type_reg\n" + # define _ASM_EXTABLE_TYPE(from, to, type) \ " .pushsection \"__ex_table\",\"a\"\n" \ " .balign 4\n" \ @@ -155,6 +182,16 @@ " .long " __stringify(type) " \n" \ " .popsection\n" +# define _ASM_EXTABLE_TYPE_REG(from, to, type, reg) \ + " .pushsection \"__ex_table\",\"a\"\n" \ + " .balign 4\n" \ + " .long (" #from ") - .\n" \ + " .long (" #to ") - .\n" \ + DEFINE_EXTABLE_TYPE_REG \ + "extable_type_reg reg=" __stringify(reg) ", type=" __stringify(type) " \n"\ + UNDEFINE_EXTABLE_TYPE_REG \ + " .popsection\n" + /* For C file, we already have NOKPROBE_SYMBOL macro */ /* diff --git a/arch/x86/include/asm/extable.h b/arch/x86/include/asm/extable.h index 93f400eb728f87..155c991ba95e25 100644 --- a/arch/x86/include/asm/extable.h +++ b/arch/x86/include/asm/extable.h @@ -21,7 +21,7 @@ */ struct exception_table_entry { - int insn, fixup, type; + int insn, fixup, data; }; struct pt_regs; @@ -31,8 +31,8 @@ struct pt_regs; do { \ (a)->fixup = (b)->fixup + (delta); \ (b)->fixup = (tmp).fixup - (delta); \ - (a)->type = (b)->type; \ - (b)->type = (tmp).type; \ + (a)->data = (b)->data; \ + (b)->data = (tmp).data; \ } while (0) extern int fixup_exception(struct pt_regs *regs, int trapnr, diff --git a/arch/x86/include/asm/extable_fixup_types.h b/arch/x86/include/asm/extable_fixup_types.h index 4d709a2768bb88..944f8329022a79 100644 --- a/arch/x86/include/asm/extable_fixup_types.h +++ b/arch/x86/include/asm/extable_fixup_types.h @@ -2,6 +2,29 @@ #ifndef _ASM_X86_EXTABLE_FIXUP_TYPES_H #define _ASM_X86_EXTABLE_FIXUP_TYPES_H +/* + * Our IMM is signed, as such it must live at the top end of the word. Also, + * since C99 hex constants are of ambigious type, force cast the mask to 'int' + * so that FIELD_GET() will DTRT and sign extend the value when it extracts it. + */ +#define EX_DATA_TYPE_MASK ((int)0x000000FF) +#define EX_DATA_REG_MASK ((int)0x00000F00) +#define EX_DATA_FLAG_MASK ((int)0x0000F000) +#define EX_DATA_IMM_MASK ((int)0xFFFF0000) + +#define EX_DATA_REG_SHIFT 8 +#define EX_DATA_FLAG_SHIFT 12 +#define EX_DATA_IMM_SHIFT 16 + +#define EX_DATA_FLAG(flag) ((flag) << EX_DATA_FLAG_SHIFT) +#define EX_DATA_IMM(imm) ((imm) << EX_DATA_IMM_SHIFT) + +/* flags */ +#define EX_FLAG_CLEAR_AX EX_DATA_FLAG(1) +#define EX_FLAG_CLEAR_DX EX_DATA_FLAG(2) +#define EX_FLAG_CLEAR_AX_DX EX_DATA_FLAG(3) + +/* types */ #define EX_TYPE_NONE 0 #define EX_TYPE_DEFAULT 1 #define EX_TYPE_FAULT 2 @@ -20,5 +43,6 @@ #define EX_TYPE_FAULT_MCE_SAFE 13 #define EX_TYPE_POP_ZERO 14 +#define EX_TYPE_IMM_REG 15 /* reg := (long)imm */ #endif diff --git a/arch/x86/include/asm/insn-eval.h b/arch/x86/include/asm/insn-eval.h index 4ec3613551e3b5..3df123f437c968 100644 --- a/arch/x86/include/asm/insn-eval.h +++ b/arch/x86/include/asm/insn-eval.h @@ -15,6 +15,8 @@ #define INSN_CODE_SEG_OPND_SZ(params) (params & 0xf) #define INSN_CODE_SEG_PARAMS(oper_sz, addr_sz) (oper_sz | (addr_sz << 4)) +int pt_regs_offset(struct pt_regs *regs, int regno); + bool insn_has_rep_prefix(struct insn *insn); void __user *insn_get_addr_ref(struct insn *insn, struct pt_regs *regs); int insn_get_modrm_rm_off(struct insn *insn, struct pt_regs *regs); diff --git a/arch/x86/lib/insn-eval.c b/arch/x86/lib/insn-eval.c index eb3ccffb9b9dcc..7760d228041bac 100644 --- a/arch/x86/lib/insn-eval.c +++ b/arch/x86/lib/insn-eval.c @@ -412,32 +412,39 @@ static short get_segment_selector(struct pt_regs *regs, int seg_reg_idx) #endif /* CONFIG_X86_64 */ } -static int get_reg_offset(struct insn *insn, struct pt_regs *regs, - enum reg_type type) +static const int pt_regoff[] = { + offsetof(struct pt_regs, ax), + offsetof(struct pt_regs, cx), + offsetof(struct pt_regs, dx), + offsetof(struct pt_regs, bx), + offsetof(struct pt_regs, sp), + offsetof(struct pt_regs, bp), + offsetof(struct pt_regs, si), + offsetof(struct pt_regs, di), +#ifdef CONFIG_X86_64 + offsetof(struct pt_regs, r8), + offsetof(struct pt_regs, r9), + offsetof(struct pt_regs, r10), + offsetof(struct pt_regs, r11), + offsetof(struct pt_regs, r12), + offsetof(struct pt_regs, r13), + offsetof(struct pt_regs, r14), + offsetof(struct pt_regs, r15), +#endif +}; + +int pt_regs_offset(struct pt_regs *regs, int regno) { + if ((unsigned)regno < ARRAY_SIZE(pt_regoff)) + return pt_regoff[regno]; + return -EDOM; +} + +static int get_regno(struct insn *insn, enum reg_type type) +{ + int nr_registers = ARRAY_SIZE(pt_regoff); int regno = 0; - static const int regoff[] = { - offsetof(struct pt_regs, ax), - offsetof(struct pt_regs, cx), - offsetof(struct pt_regs, dx), - offsetof(struct pt_regs, bx), - offsetof(struct pt_regs, sp), - offsetof(struct pt_regs, bp), - offsetof(struct pt_regs, si), - offsetof(struct pt_regs, di), -#ifdef CONFIG_X86_64 - offsetof(struct pt_regs, r8), - offsetof(struct pt_regs, r9), - offsetof(struct pt_regs, r10), - offsetof(struct pt_regs, r11), - offsetof(struct pt_regs, r12), - offsetof(struct pt_regs, r13), - offsetof(struct pt_regs, r14), - offsetof(struct pt_regs, r15), -#endif - }; - int nr_registers = ARRAY_SIZE(regoff); /* * Don't possibly decode a 32-bit instructions as * reading a 64-bit-only register. @@ -505,7 +512,18 @@ static int get_reg_offset(struct insn *insn, struct pt_regs *regs, WARN_ONCE(1, "decoded an instruction with an invalid register"); return -EINVAL; } - return regoff[regno]; + return regno; +} + +static int get_reg_offset(struct insn *insn, struct pt_regs *regs, + enum reg_type type) +{ + int regno = get_regno(insn, type); + + if (regno < 0) + return regno; + + return pt_regs_offset(regs, regno); } /** diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c index f59a4d017070a2..251732113624b9 100644 --- a/arch/x86/mm/extable.c +++ b/arch/x86/mm/extable.c @@ -2,12 +2,25 @@ #include #include #include +#include #include #include #include #include #include +#include + +static inline unsigned long *pt_regs_nr(struct pt_regs *regs, int nr) +{ + int reg_offset = pt_regs_offset(regs, nr); + static unsigned long __dummy; + + if (WARN_ON_ONCE(reg_offset < 0)) + return &__dummy; + + return (unsigned long *)((unsigned long)regs + reg_offset); +} static inline unsigned long ex_fixup_addr(const struct exception_table_entry *x) @@ -15,10 +28,15 @@ ex_fixup_addr(const struct exception_table_entry *x) return (unsigned long)&x->fixup + x->fixup; } -static bool ex_handler_default(const struct exception_table_entry *fixup, +static bool ex_handler_default(const struct exception_table_entry *e, struct pt_regs *regs) { - regs->ip = ex_fixup_addr(fixup); + if (e->data & EX_FLAG_CLEAR_AX) + regs->ax = 0; + if (e->data & EX_FLAG_CLEAR_DX) + regs->dx = 0; + + regs->ip = ex_fixup_addr(e); return true; } @@ -111,17 +129,25 @@ static bool ex_handler_pop_zero(const struct exception_table_entry *fixup, return ex_handler_default(fixup, regs); } +static bool ex_handler_imm_reg(const struct exception_table_entry *fixup, + struct pt_regs *regs, int reg, int imm) +{ + *pt_regs_nr(regs, reg) = (long)imm; + return ex_handler_default(fixup, regs); +} + int ex_get_fixup_type(unsigned long ip) { const struct exception_table_entry *e = search_exception_tables(ip); - return e ? e->type : EX_TYPE_NONE; + return e ? FIELD_GET(EX_DATA_TYPE_MASK, e->data) : EX_TYPE_NONE; } int fixup_exception(struct pt_regs *regs, int trapnr, unsigned long error_code, unsigned long fault_addr) { const struct exception_table_entry *e; + int type, reg, imm; #ifdef CONFIG_PNPBIOS if (unlikely(SEGMENT_IS_PNP_CODE(regs->cs))) { @@ -141,7 +167,11 @@ int fixup_exception(struct pt_regs *regs, int trapnr, unsigned long error_code, if (!e) return 0; - switch (e->type) { + type = FIELD_GET(EX_DATA_TYPE_MASK, e->data); + reg = FIELD_GET(EX_DATA_REG_MASK, e->data); + imm = FIELD_GET(EX_DATA_IMM_MASK, e->data); + + switch (type) { case EX_TYPE_DEFAULT: case EX_TYPE_DEFAULT_MCE_SAFE: return ex_handler_default(e, regs); @@ -170,6 +200,8 @@ int fixup_exception(struct pt_regs *regs, int trapnr, unsigned long error_code, break; case EX_TYPE_POP_ZERO: return ex_handler_pop_zero(e, regs); + case EX_TYPE_IMM_REG: + return ex_handler_imm_reg(e, regs, reg, imm); } BUG(); } diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 4f3a6003715034..dccaab2113f937 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -1342,7 +1342,7 @@ st: if (is_imm8(insn->off)) } ex->insn = delta; - ex->type = EX_TYPE_BPF; + ex->data = EX_TYPE_BPF; if (dst_reg > BPF_REG_9) { pr_err("verifier error\n"); From 6875d2425be8975cfe9173de1a13be0bf17b3932 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 10 Nov 2021 11:01:10 +0100 Subject: [PATCH 0901/1056] x86/msr: Remove .fixup usage [ Upstream commit d52a7344bdfa9c3442d3f86fb3501d9343726c76 ] Rework the MSR accessors to remove .fixup usage. Add two new extable types (to the 4 already existing msr ones) using the new register infrastructure to record which register should get the error value. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Josh Poimboeuf Link: https://lore.kernel.org/r/20211110101325.364084212@infradead.org Signed-off-by: Sasha Levin --- arch/x86/include/asm/extable_fixup_types.h | 23 +++++----- arch/x86/include/asm/msr.h | 26 ++++------- arch/x86/mm/extable.c | 51 ++++++++++++---------- 3 files changed, 47 insertions(+), 53 deletions(-) diff --git a/arch/x86/include/asm/extable_fixup_types.h b/arch/x86/include/asm/extable_fixup_types.h index 944f8329022a79..9d597fe1017d7c 100644 --- a/arch/x86/include/asm/extable_fixup_types.h +++ b/arch/x86/include/asm/extable_fixup_types.h @@ -32,17 +32,16 @@ #define EX_TYPE_COPY 4 #define EX_TYPE_CLEAR_FS 5 #define EX_TYPE_FPU_RESTORE 6 -#define EX_TYPE_WRMSR 7 -#define EX_TYPE_RDMSR 8 -#define EX_TYPE_BPF 9 - -#define EX_TYPE_WRMSR_IN_MCE 10 -#define EX_TYPE_RDMSR_IN_MCE 11 - -#define EX_TYPE_DEFAULT_MCE_SAFE 12 -#define EX_TYPE_FAULT_MCE_SAFE 13 - -#define EX_TYPE_POP_ZERO 14 -#define EX_TYPE_IMM_REG 15 /* reg := (long)imm */ +#define EX_TYPE_BPF 7 +#define EX_TYPE_WRMSR 8 +#define EX_TYPE_RDMSR 9 +#define EX_TYPE_WRMSR_SAFE 10 /* reg := -EIO */ +#define EX_TYPE_RDMSR_SAFE 11 /* reg := -EIO */ +#define EX_TYPE_WRMSR_IN_MCE 12 +#define EX_TYPE_RDMSR_IN_MCE 13 +#define EX_TYPE_DEFAULT_MCE_SAFE 14 +#define EX_TYPE_FAULT_MCE_SAFE 15 +#define EX_TYPE_POP_ZERO 16 +#define EX_TYPE_IMM_REG 17 /* reg := (long)imm */ #endif diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h index 6b52182e178a77..d42e6c6b47b1e4 100644 --- a/arch/x86/include/asm/msr.h +++ b/arch/x86/include/asm/msr.h @@ -137,17 +137,11 @@ static inline unsigned long long native_read_msr_safe(unsigned int msr, { DECLARE_ARGS(val, low, high); - asm volatile("2: rdmsr ; xor %[err],%[err]\n" - "1:\n\t" - ".section .fixup,\"ax\"\n\t" - "3: mov %[fault],%[err]\n\t" - "xorl %%eax, %%eax\n\t" - "xorl %%edx, %%edx\n\t" - "jmp 1b\n\t" - ".previous\n\t" - _ASM_EXTABLE(2b, 3b) + asm volatile("1: rdmsr ; xor %[err],%[err]\n" + "2:\n\t" + _ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_RDMSR_SAFE, %[err]) : [err] "=r" (*err), EAX_EDX_RET(val, low, high) - : "c" (msr), [fault] "i" (-EIO)); + : "c" (msr)); if (tracepoint_enabled(read_msr)) do_trace_read_msr(msr, EAX_EDX_VAL(val, low, high), *err); return EAX_EDX_VAL(val, low, high); @@ -169,15 +163,11 @@ native_write_msr_safe(unsigned int msr, u32 low, u32 high) { int err; - asm volatile("2: wrmsr ; xor %[err],%[err]\n" - "1:\n\t" - ".section .fixup,\"ax\"\n\t" - "3: mov %[fault],%[err] ; jmp 1b\n\t" - ".previous\n\t" - _ASM_EXTABLE(2b, 3b) + asm volatile("1: wrmsr ; xor %[err],%[err]\n" + "2:\n\t" + _ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_WRMSR_SAFE, %[err]) : [err] "=a" (err) - : "c" (msr), "0" (low), "d" (high), - [fault] "i" (-EIO) + : "c" (msr), "0" (low), "d" (high) : "memory"); if (tracepoint_enabled(write_msr)) do_trace_write_msr(msr, ((u64)high << 32 | low), err); diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c index 251732113624b9..1c558f8e8c079b 100644 --- a/arch/x86/mm/extable.c +++ b/arch/x86/mm/extable.c @@ -83,28 +83,29 @@ static bool ex_handler_copy(const struct exception_table_entry *fixup, return ex_handler_fault(fixup, regs, trapnr); } -static bool ex_handler_rdmsr_unsafe(const struct exception_table_entry *fixup, - struct pt_regs *regs) +static bool ex_handler_msr(const struct exception_table_entry *fixup, + struct pt_regs *regs, bool wrmsr, bool safe, int reg) { - if (pr_warn_once("unchecked MSR access error: RDMSR from 0x%x at rIP: 0x%lx (%pS)\n", + if (!safe && wrmsr && + pr_warn_once("unchecked MSR access error: WRMSR to 0x%x (tried to write 0x%08x%08x) at rIP: 0x%lx (%pS)\n", + (unsigned int)regs->cx, (unsigned int)regs->dx, + (unsigned int)regs->ax, regs->ip, (void *)regs->ip)) + show_stack_regs(regs); + + if (!safe && !wrmsr && + pr_warn_once("unchecked MSR access error: RDMSR from 0x%x at rIP: 0x%lx (%pS)\n", (unsigned int)regs->cx, regs->ip, (void *)regs->ip)) show_stack_regs(regs); - /* Pretend that the read succeeded and returned 0. */ - regs->ax = 0; - regs->dx = 0; - return ex_handler_default(fixup, regs); -} + if (!wrmsr) { + /* Pretend that the read succeeded and returned 0. */ + regs->ax = 0; + regs->dx = 0; + } -static bool ex_handler_wrmsr_unsafe(const struct exception_table_entry *fixup, - struct pt_regs *regs) -{ - if (pr_warn_once("unchecked MSR access error: WRMSR to 0x%x (tried to write 0x%08x%08x) at rIP: 0x%lx (%pS)\n", - (unsigned int)regs->cx, (unsigned int)regs->dx, - (unsigned int)regs->ax, regs->ip, (void *)regs->ip)) - show_stack_regs(regs); + if (safe) + *pt_regs_nr(regs, reg) = -EIO; - /* Pretend that the write succeeded. */ return ex_handler_default(fixup, regs); } @@ -186,18 +187,22 @@ int fixup_exception(struct pt_regs *regs, int trapnr, unsigned long error_code, return ex_handler_clear_fs(e, regs); case EX_TYPE_FPU_RESTORE: return ex_handler_fprestore(e, regs); - case EX_TYPE_RDMSR: - return ex_handler_rdmsr_unsafe(e, regs); - case EX_TYPE_WRMSR: - return ex_handler_wrmsr_unsafe(e, regs); case EX_TYPE_BPF: return ex_handler_bpf(e, regs); - case EX_TYPE_RDMSR_IN_MCE: - ex_handler_msr_mce(regs, false); - break; + case EX_TYPE_WRMSR: + return ex_handler_msr(e, regs, true, false, reg); + case EX_TYPE_RDMSR: + return ex_handler_msr(e, regs, false, false, reg); + case EX_TYPE_WRMSR_SAFE: + return ex_handler_msr(e, regs, true, true, reg); + case EX_TYPE_RDMSR_SAFE: + return ex_handler_msr(e, regs, false, true, reg); case EX_TYPE_WRMSR_IN_MCE: ex_handler_msr_mce(regs, true); break; + case EX_TYPE_RDMSR_IN_MCE: + ex_handler_msr_mce(regs, false); + break; case EX_TYPE_POP_ZERO: return ex_handler_pop_zero(e, regs); case EX_TYPE_IMM_REG: From 88eded8104d2ca0429703755dd250f8cbecc1447 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 10 Nov 2021 11:01:11 +0100 Subject: [PATCH 0902/1056] x86/futex: Remove .fixup usage [ Upstream commit 4c132d1d844a53fc4e4b5c34e36ef10d6124b783 ] Use the new EX_TYPE_IMM_REG to store -EFAULT into the designated 'ret' register, this removes the need for anonymous .fixup code. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Josh Poimboeuf Link: https://lore.kernel.org/r/20211110101325.426016322@infradead.org Signed-off-by: Sasha Levin --- arch/x86/include/asm/extable_fixup_types.h | 2 ++ arch/x86/include/asm/futex.h | 28 +++++++--------------- 2 files changed, 10 insertions(+), 20 deletions(-) diff --git a/arch/x86/include/asm/extable_fixup_types.h b/arch/x86/include/asm/extable_fixup_types.h index 9d597fe1017d7c..7469038de100dc 100644 --- a/arch/x86/include/asm/extable_fixup_types.h +++ b/arch/x86/include/asm/extable_fixup_types.h @@ -42,6 +42,8 @@ #define EX_TYPE_DEFAULT_MCE_SAFE 14 #define EX_TYPE_FAULT_MCE_SAFE 15 #define EX_TYPE_POP_ZERO 16 + #define EX_TYPE_IMM_REG 17 /* reg := (long)imm */ +#define EX_TYPE_EFAULT_REG (EX_TYPE_IMM_REG | EX_DATA_IMM(-EFAULT)) #endif diff --git a/arch/x86/include/asm/futex.h b/arch/x86/include/asm/futex.h index f9c00110a69add..99d345b686fa22 100644 --- a/arch/x86/include/asm/futex.h +++ b/arch/x86/include/asm/futex.h @@ -17,13 +17,9 @@ do { \ int oldval = 0, ret; \ asm volatile("1:\t" insn "\n" \ "2:\n" \ - "\t.section .fixup,\"ax\"\n" \ - "3:\tmov\t%3, %1\n" \ - "\tjmp\t2b\n" \ - "\t.previous\n" \ - _ASM_EXTABLE_UA(1b, 3b) \ + _ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_EFAULT_REG, %1) \ : "=r" (oldval), "=r" (ret), "+m" (*uaddr) \ - : "i" (-EFAULT), "0" (oparg), "1" (0)); \ + : "0" (oparg), "1" (0)); \ if (ret) \ goto label; \ *oval = oldval; \ @@ -39,15 +35,11 @@ do { \ "3:\t" LOCK_PREFIX "cmpxchgl %3, %2\n" \ "\tjnz\t2b\n" \ "4:\n" \ - "\t.section .fixup,\"ax\"\n" \ - "5:\tmov\t%5, %1\n" \ - "\tjmp\t4b\n" \ - "\t.previous\n" \ - _ASM_EXTABLE_UA(1b, 5b) \ - _ASM_EXTABLE_UA(3b, 5b) \ + _ASM_EXTABLE_TYPE_REG(1b, 4b, EX_TYPE_EFAULT_REG, %1) \ + _ASM_EXTABLE_TYPE_REG(3b, 4b, EX_TYPE_EFAULT_REG, %1) \ : "=&a" (oldval), "=&r" (ret), \ "+m" (*uaddr), "=&r" (tem) \ - : "r" (oparg), "i" (-EFAULT), "1" (0)); \ + : "r" (oparg), "1" (0)); \ if (ret) \ goto label; \ *oval = oldval; \ @@ -95,15 +87,11 @@ static inline int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, if (!user_access_begin(uaddr, sizeof(u32))) return -EFAULT; asm volatile("\n" - "1:\t" LOCK_PREFIX "cmpxchgl %4, %2\n" + "1:\t" LOCK_PREFIX "cmpxchgl %3, %2\n" "2:\n" - "\t.section .fixup, \"ax\"\n" - "3:\tmov %3, %0\n" - "\tjmp 2b\n" - "\t.previous\n" - _ASM_EXTABLE_UA(1b, 3b) + _ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_EFAULT_REG, %0) \ : "+r" (ret), "=a" (oldval), "+m" (*uaddr) - : "i" (-EFAULT), "r" (newval), "1" (oldval) + : "r" (newval), "1" (oldval) : "memory" ); user_access_end(); From d97c0667c1e61ded6639117b4b9584a9c12b7e66 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 2 Feb 2022 00:49:44 +0000 Subject: [PATCH 0903/1056] KVM: x86: Use __try_cmpxchg_user() to emulate atomic accesses [ Upstream commit 1c2361f667f3648855ceae25f1332c18413fdb9f ] Use the recently introduce __try_cmpxchg_user() to emulate atomic guest accesses via the associated userspace address instead of mapping the backing pfn into kernel address space. Using kvm_vcpu_map() is unsafe as it does not coordinate with KVM's mmu_notifier to ensure the hva=>pfn translation isn't changed/unmapped in the memremap() path, i.e. when there's no struct page and thus no elevated refcount. Fixes: 42e35f8072c3 ("KVM/X86: Use kvm_vcpu_map in emulator_cmpxchg_emulated") Cc: stable@vger.kernel.org Signed-off-by: Sean Christopherson Message-Id: <20220202004945.2540433-5-seanjc@google.com> Signed-off-by: Paolo Bonzini Signed-off-by: Sasha Levin --- arch/x86/kvm/x86.c | 35 ++++++++++++++--------------------- 1 file changed, 14 insertions(+), 21 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 4525d0b25a432d..f9802ceed60ab8 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6894,15 +6894,8 @@ static int emulator_write_emulated(struct x86_emulate_ctxt *ctxt, exception, &write_emultor); } -#define CMPXCHG_TYPE(t, ptr, old, new) \ - (cmpxchg((t *)(ptr), *(t *)(old), *(t *)(new)) == *(t *)(old)) - -#ifdef CONFIG_X86_64 -# define CMPXCHG64(ptr, old, new) CMPXCHG_TYPE(u64, ptr, old, new) -#else -# define CMPXCHG64(ptr, old, new) \ - (cmpxchg64((u64 *)(ptr), *(u64 *)(old), *(u64 *)(new)) == *(u64 *)(old)) -#endif +#define emulator_try_cmpxchg_user(t, ptr, old, new) \ + (__try_cmpxchg_user((t __user *)(ptr), (t *)(old), *(t *)(new), efault ## t)) static int emulator_cmpxchg_emulated(struct x86_emulate_ctxt *ctxt, unsigned long addr, @@ -6911,12 +6904,11 @@ static int emulator_cmpxchg_emulated(struct x86_emulate_ctxt *ctxt, unsigned int bytes, struct x86_exception *exception) { - struct kvm_host_map map; struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); u64 page_line_mask; + unsigned long hva; gpa_t gpa; - char *kaddr; - bool exchanged; + int r; /* guests cmpxchg8b have to be emulated atomically */ if (bytes > 8 || (bytes & (bytes - 1))) @@ -6940,31 +6932,32 @@ static int emulator_cmpxchg_emulated(struct x86_emulate_ctxt *ctxt, if (((gpa + bytes - 1) & page_line_mask) != (gpa & page_line_mask)) goto emul_write; - if (kvm_vcpu_map(vcpu, gpa_to_gfn(gpa), &map)) + hva = kvm_vcpu_gfn_to_hva(vcpu, gpa_to_gfn(gpa)); + if (kvm_is_error_hva(addr)) goto emul_write; - kaddr = map.hva + offset_in_page(gpa); + hva += offset_in_page(gpa); switch (bytes) { case 1: - exchanged = CMPXCHG_TYPE(u8, kaddr, old, new); + r = emulator_try_cmpxchg_user(u8, hva, old, new); break; case 2: - exchanged = CMPXCHG_TYPE(u16, kaddr, old, new); + r = emulator_try_cmpxchg_user(u16, hva, old, new); break; case 4: - exchanged = CMPXCHG_TYPE(u32, kaddr, old, new); + r = emulator_try_cmpxchg_user(u32, hva, old, new); break; case 8: - exchanged = CMPXCHG64(kaddr, old, new); + r = emulator_try_cmpxchg_user(u64, hva, old, new); break; default: BUG(); } - kvm_vcpu_unmap(vcpu, &map, true); - - if (!exchanged) + if (r < 0) + goto emul_write; + if (r) return X86EMUL_CMPXCHG_FAILED; kvm_page_track_write(vcpu, gpa, new, bytes); From d7afb4a13f6c6ee7df7d0bfc67b4ef19ece6d802 Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Wed, 16 Feb 2022 11:51:45 +0200 Subject: [PATCH 0904/1056] xhci: dbc: refactor xhci_dbc_init() [ Upstream commit 534675942e901959b5d8dc11ea526c4e48817d8e ] Refactor xhci_dbc_init(), splitting it into logical parts closer to the Linux device model. - Create the fake dbc device, depends on xhci strucure - Allocate a dbc structure, xhci agnostic - Call xhci_dbc_tty_probe(), similar to actual probe. Adjustments to xhci_dbc_exit and xhci_dbc_remove are also needed as a result to the xhci_dbc_init() changes Mostly non-functional changes, except for creating the dbc sysfs entry earlier, together with the dbc structure. Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20220216095153.1303105-2-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/usb/host/xhci-dbgcap.c | 124 ++++++++++++++++++--------------- 1 file changed, 66 insertions(+), 58 deletions(-) diff --git a/drivers/usb/host/xhci-dbgcap.c b/drivers/usb/host/xhci-dbgcap.c index ccb0156fcebebf..6a437862b4980a 100644 --- a/drivers/usb/host/xhci-dbgcap.c +++ b/drivers/usb/host/xhci-dbgcap.c @@ -924,49 +924,6 @@ static void xhci_do_dbc_exit(struct xhci_hcd *xhci) spin_unlock_irqrestore(&xhci->lock, flags); } -static int xhci_do_dbc_init(struct xhci_hcd *xhci) -{ - u32 reg; - struct xhci_dbc *dbc; - unsigned long flags; - void __iomem *base; - int dbc_cap_offs; - - base = &xhci->cap_regs->hc_capbase; - dbc_cap_offs = xhci_find_next_ext_cap(base, 0, XHCI_EXT_CAPS_DEBUG); - if (!dbc_cap_offs) - return -ENODEV; - - dbc = kzalloc(sizeof(*dbc), GFP_KERNEL); - if (!dbc) - return -ENOMEM; - - dbc->regs = base + dbc_cap_offs; - - /* We will avoid using DbC in xhci driver if it's in use. */ - reg = readl(&dbc->regs->control); - if (reg & DBC_CTRL_DBC_ENABLE) { - kfree(dbc); - return -EBUSY; - } - - spin_lock_irqsave(&xhci->lock, flags); - if (xhci->dbc) { - spin_unlock_irqrestore(&xhci->lock, flags); - kfree(dbc); - return -EBUSY; - } - xhci->dbc = dbc; - spin_unlock_irqrestore(&xhci->lock, flags); - - dbc->xhci = xhci; - dbc->dev = xhci_to_hcd(xhci)->self.sysdev; - INIT_DELAYED_WORK(&dbc->event_work, xhci_dbc_handle_events); - spin_lock_init(&dbc->lock); - - return 0; -} - static ssize_t dbc_show(struct device *dev, struct device_attribute *attr, char *buf) @@ -1026,44 +983,95 @@ static ssize_t dbc_store(struct device *dev, static DEVICE_ATTR_RW(dbc); -int xhci_dbc_init(struct xhci_hcd *xhci) +struct xhci_dbc * +xhci_alloc_dbc(struct device *dev, void __iomem *base) { + struct xhci_dbc *dbc; int ret; - struct device *dev = xhci_to_hcd(xhci)->self.controller; - ret = xhci_do_dbc_init(xhci); + dbc = kzalloc(sizeof(*dbc), GFP_KERNEL); + if (!dbc) + return NULL; + + dbc->regs = base; + dbc->dev = dev; + + if (readl(&dbc->regs->control) & DBC_CTRL_DBC_ENABLE) + return NULL; + + INIT_DELAYED_WORK(&dbc->event_work, xhci_dbc_handle_events); + spin_lock_init(&dbc->lock); + + ret = device_create_file(dev, &dev_attr_dbc); if (ret) - goto init_err3; + goto err; + + return dbc; +err: + kfree(dbc); + return NULL; +} + +/* undo what xhci_alloc_dbc() did */ +void xhci_dbc_remove(struct xhci_dbc *dbc) +{ + if (!dbc) + return; + /* stop hw, stop wq and call dbc->ops->stop() */ + xhci_dbc_stop(dbc); + + /* remove sysfs files */ + device_remove_file(dbc->dev, &dev_attr_dbc); + + kfree(dbc); +} + +int xhci_dbc_init(struct xhci_hcd *xhci) +{ + struct device *dev; + void __iomem *base; + int ret; + int dbc_cap_offs; + + /* create all parameters needed resembling a dbc device */ + dev = xhci_to_hcd(xhci)->self.controller; + base = &xhci->cap_regs->hc_capbase; + + dbc_cap_offs = xhci_find_next_ext_cap(base, 0, XHCI_EXT_CAPS_DEBUG); + if (!dbc_cap_offs) + return -ENODEV; + + /* already allocated and in use */ + if (xhci->dbc) + return -EBUSY; + + xhci->dbc = xhci_alloc_dbc(dev, base); + if (!xhci->dbc) + return -ENOMEM; ret = xhci_dbc_tty_probe(xhci); if (ret) goto init_err2; - ret = device_create_file(dev, &dev_attr_dbc); - if (ret) - goto init_err1; - return 0; -init_err1: - xhci_dbc_tty_remove(xhci->dbc); init_err2: xhci_do_dbc_exit(xhci); -init_err3: return ret; } void xhci_dbc_exit(struct xhci_hcd *xhci) { - struct device *dev = xhci_to_hcd(xhci)->self.controller; + unsigned long flags; if (!xhci->dbc) return; - device_remove_file(dev, &dev_attr_dbc); xhci_dbc_tty_remove(xhci->dbc); - xhci_dbc_stop(xhci->dbc); - xhci_do_dbc_exit(xhci); + xhci_dbc_remove(xhci->dbc); + spin_lock_irqsave(&xhci->lock, flags); + xhci->dbc = NULL; + spin_unlock_irqrestore(&xhci->lock, flags); } #ifdef CONFIG_PM From 413c5f751f7ceb2f2f2787561465a17932ca51cb Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Wed, 16 Feb 2022 11:51:46 +0200 Subject: [PATCH 0905/1056] xhci: dbc: create and remove dbc structure in dbgtty driver. [ Upstream commit 5ce036b98dd3301fc43bb06a6383ef07b6c776bc ] Turn the dbgtty closer to a device driver by allocating the dbc structure in its own xhci_dbc_tty_probe() function, and freeing it in xhci_dbc_tty_remove() Remove xhci_do_dbc_exit() as its no longer needed. allocate and create the dbc strcuture in xhci_dbc_tty_probe() Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20220216095153.1303105-3-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/usb/host/xhci-dbgcap.c | 26 +++----------------------- drivers/usb/host/xhci-dbgcap.h | 5 ++++- drivers/usb/host/xhci-dbgtty.c | 22 +++++++++++++++------- 3 files changed, 22 insertions(+), 31 deletions(-) diff --git a/drivers/usb/host/xhci-dbgcap.c b/drivers/usb/host/xhci-dbgcap.c index 6a437862b4980a..f4da5708a40f8a 100644 --- a/drivers/usb/host/xhci-dbgcap.c +++ b/drivers/usb/host/xhci-dbgcap.c @@ -914,16 +914,6 @@ static void xhci_dbc_handle_events(struct work_struct *work) mod_delayed_work(system_wq, &dbc->event_work, 1); } -static void xhci_do_dbc_exit(struct xhci_hcd *xhci) -{ - unsigned long flags; - - spin_lock_irqsave(&xhci->lock, flags); - kfree(xhci->dbc); - xhci->dbc = NULL; - spin_unlock_irqrestore(&xhci->lock, flags); -} - static ssize_t dbc_show(struct device *dev, struct device_attribute *attr, char *buf) @@ -984,7 +974,7 @@ static ssize_t dbc_store(struct device *dev, static DEVICE_ATTR_RW(dbc); struct xhci_dbc * -xhci_alloc_dbc(struct device *dev, void __iomem *base) +xhci_alloc_dbc(struct device *dev, void __iomem *base, const struct dbc_driver *driver) { struct xhci_dbc *dbc; int ret; @@ -995,6 +985,7 @@ xhci_alloc_dbc(struct device *dev, void __iomem *base) dbc->regs = base; dbc->dev = dev; + dbc->driver = driver; if (readl(&dbc->regs->control) & DBC_CTRL_DBC_ENABLE) return NULL; @@ -1045,18 +1036,8 @@ int xhci_dbc_init(struct xhci_hcd *xhci) if (xhci->dbc) return -EBUSY; - xhci->dbc = xhci_alloc_dbc(dev, base); - if (!xhci->dbc) - return -ENOMEM; - - ret = xhci_dbc_tty_probe(xhci); - if (ret) - goto init_err2; - - return 0; + ret = xhci_dbc_tty_probe(dev, base + dbc_cap_offs, xhci); -init_err2: - xhci_do_dbc_exit(xhci); return ret; } @@ -1068,7 +1049,6 @@ void xhci_dbc_exit(struct xhci_hcd *xhci) return; xhci_dbc_tty_remove(xhci->dbc); - xhci_dbc_remove(xhci->dbc); spin_lock_irqsave(&xhci->lock, flags); xhci->dbc = NULL; spin_unlock_irqrestore(&xhci->lock, flags); diff --git a/drivers/usb/host/xhci-dbgcap.h b/drivers/usb/host/xhci-dbgcap.h index c70b78d504eb54..5d8c7815491cbf 100644 --- a/drivers/usb/host/xhci-dbgcap.h +++ b/drivers/usb/host/xhci-dbgcap.h @@ -196,8 +196,11 @@ static inline struct dbc_ep *get_out_ep(struct xhci_dbc *dbc) #ifdef CONFIG_USB_XHCI_DBGCAP int xhci_dbc_init(struct xhci_hcd *xhci); void xhci_dbc_exit(struct xhci_hcd *xhci); -int xhci_dbc_tty_probe(struct xhci_hcd *xhci); +int xhci_dbc_tty_probe(struct device *dev, void __iomem *res, struct xhci_hcd *xhci); void xhci_dbc_tty_remove(struct xhci_dbc *dbc); +struct xhci_dbc *xhci_alloc_dbc(struct device *dev, void __iomem *res, + const struct dbc_driver *driver); +void xhci_dbc_remove(struct xhci_dbc *dbc); struct dbc_request *dbc_alloc_request(struct xhci_dbc *dbc, unsigned int direction, gfp_t flags); diff --git a/drivers/usb/host/xhci-dbgtty.c b/drivers/usb/host/xhci-dbgtty.c index eb46e642e87aa5..18bcc96853ae10 100644 --- a/drivers/usb/host/xhci-dbgtty.c +++ b/drivers/usb/host/xhci-dbgtty.c @@ -468,9 +468,9 @@ static const struct dbc_driver dbc_driver = { .disconnect = xhci_dbc_tty_unregister_device, }; -int xhci_dbc_tty_probe(struct xhci_hcd *xhci) +int xhci_dbc_tty_probe(struct device *dev, void __iomem *base, struct xhci_hcd *xhci) { - struct xhci_dbc *dbc = xhci->dbc; + struct xhci_dbc *dbc; struct dbc_port *port; int status; @@ -485,13 +485,22 @@ int xhci_dbc_tty_probe(struct xhci_hcd *xhci) goto out; } - dbc->driver = &dbc_driver; - dbc->priv = port; + dbc_tty_driver->driver_state = port; + + dbc = xhci_alloc_dbc(dev, base, &dbc_driver); + if (!dbc) { + status = -ENOMEM; + goto out2; + } + dbc->priv = port; - dbc_tty_driver->driver_state = port; + /* get rid of xhci once this is a real driver binding to a device */ + xhci->dbc = dbc; return 0; +out2: + kfree(port); out: /* dbc_tty_exit will be called by module_exit() in the future */ dbc_tty_exit(); @@ -506,8 +515,7 @@ void xhci_dbc_tty_remove(struct xhci_dbc *dbc) { struct dbc_port *port = dbc_to_port(dbc); - dbc->driver = NULL; - dbc->priv = NULL; + xhci_dbc_remove(dbc); kfree(port); /* dbc_tty_exit will be called by module_exit() in the future */ From 813f4b49e81ccedcc86efeb05f724b4938ad3b55 Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Wed, 16 Feb 2022 11:51:47 +0200 Subject: [PATCH 0906/1056] xhci: dbc: Rename xhci_dbc_init and xhci_dbc_exit [ Upstream commit 5c44d9d7570b244ca08fef817c4c90aa7a1f1b5f ] These names give the impression the functions are related to module init calls, but are in fact creating and removing the dbc fake device Rename them to xhci_create_dbc_dev() and xhci_remove_dbc_dev(). We will need the _init and _exit names for actual dbc module init and exit calls. No functional changes Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20220216095153.1303105-4-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/usb/host/xhci-dbgcap.c | 5 +++-- drivers/usb/host/xhci-dbgcap.h | 8 ++++---- drivers/usb/host/xhci.c | 4 ++-- 3 files changed, 9 insertions(+), 8 deletions(-) diff --git a/drivers/usb/host/xhci-dbgcap.c b/drivers/usb/host/xhci-dbgcap.c index f4da5708a40f8a..46c8f3c187f7ea 100644 --- a/drivers/usb/host/xhci-dbgcap.c +++ b/drivers/usb/host/xhci-dbgcap.c @@ -1017,7 +1017,8 @@ void xhci_dbc_remove(struct xhci_dbc *dbc) kfree(dbc); } -int xhci_dbc_init(struct xhci_hcd *xhci) + +int xhci_create_dbc_dev(struct xhci_hcd *xhci) { struct device *dev; void __iomem *base; @@ -1041,7 +1042,7 @@ int xhci_dbc_init(struct xhci_hcd *xhci) return ret; } -void xhci_dbc_exit(struct xhci_hcd *xhci) +void xhci_remove_dbc_dev(struct xhci_hcd *xhci) { unsigned long flags; diff --git a/drivers/usb/host/xhci-dbgcap.h b/drivers/usb/host/xhci-dbgcap.h index 5d8c7815491cbf..8b5b363a071942 100644 --- a/drivers/usb/host/xhci-dbgcap.h +++ b/drivers/usb/host/xhci-dbgcap.h @@ -194,8 +194,8 @@ static inline struct dbc_ep *get_out_ep(struct xhci_dbc *dbc) } #ifdef CONFIG_USB_XHCI_DBGCAP -int xhci_dbc_init(struct xhci_hcd *xhci); -void xhci_dbc_exit(struct xhci_hcd *xhci); +int xhci_create_dbc_dev(struct xhci_hcd *xhci); +void xhci_remove_dbc_dev(struct xhci_hcd *xhci); int xhci_dbc_tty_probe(struct device *dev, void __iomem *res, struct xhci_hcd *xhci); void xhci_dbc_tty_remove(struct xhci_dbc *dbc); struct xhci_dbc *xhci_alloc_dbc(struct device *dev, void __iomem *res, @@ -211,12 +211,12 @@ int xhci_dbc_suspend(struct xhci_hcd *xhci); int xhci_dbc_resume(struct xhci_hcd *xhci); #endif /* CONFIG_PM */ #else -static inline int xhci_dbc_init(struct xhci_hcd *xhci) +static inline int xhci_create_dbc_dev(struct xhci_hcd *xhci) { return 0; } -static inline void xhci_dbc_exit(struct xhci_hcd *xhci) +static inline void xhci_remove_dbc_dev(struct xhci_hcd *xhci) { } diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index 94fe7d64e762b2..a4e99f8668b38d 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -693,7 +693,7 @@ int xhci_run(struct usb_hcd *hcd) xhci_dbg_trace(xhci, trace_xhci_dbg_init, "Finished xhci_run for USB2 roothub"); - xhci_dbc_init(xhci); + xhci_create_dbc_dev(xhci); xhci_debugfs_init(xhci); @@ -723,7 +723,7 @@ static void xhci_stop(struct usb_hcd *hcd) return; } - xhci_dbc_exit(xhci); + xhci_remove_dbc_dev(xhci); spin_lock_irq(&xhci->lock); xhci->xhc_state |= XHCI_STATE_HALTED; From 829baf398f2c93ef1a3f37aefc7655cda7c8c8fd Mon Sep 17 00:00:00 2001 From: Kishon Vijay Abraham I Date: Tue, 10 May 2022 14:46:30 +0530 Subject: [PATCH 0907/1056] xhci: Set HCD flag to defer primary roothub registration [ Upstream commit b7a4f9b5d0e4b6dd937678c546c0b322dd1a4054 ] Set "HCD_FLAG_DEFER_RH_REGISTER" to hcd->flags in xhci_run() to defer registering primary roothub in usb_add_hcd() if xhci has two roothubs. This will make sure both primary roothub and secondary roothub will be registered along with the second HCD. This is required for cold plugged USB devices to be detected in certain PCIe USB cards (like Inateck USB card connected to AM64 EVM or J7200 EVM). This patch has been added and reverted earier as it triggered a race in usb device enumeration. That race is now fixed in 5.16-rc3, and in stable back to 5.4 commit 6cca13de26ee ("usb: hub: Fix locking issues with address0_mutex") commit 6ae6dc22d2d1 ("usb: hub: Fix usb enumeration issue due to address0 race") [minor rebase change, and commit message update -Mathias] CC: stable@vger.kernel.org # 5.4+ Suggested-by: Mathias Nyman Tested-by: Chris Chiu Signed-off-by: Kishon Vijay Abraham I Link: https://lore.kernel.org/r/20220510091630.16564-3-kishon@ti.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/usb/host/xhci.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index a4e99f8668b38d..d76c10f9ad8070 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -693,6 +693,8 @@ int xhci_run(struct usb_hcd *hcd) xhci_dbg_trace(xhci, trace_xhci_dbg_init, "Finished xhci_run for USB2 roothub"); + set_bit(HCD_FLAG_DEFER_RH_REGISTER, &hcd->flags); + xhci_create_dbc_dev(xhci); xhci_debugfs_init(xhci); From 4448327b41738dbfcda680eb4935ff835568f468 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Fri, 25 Mar 2022 22:01:43 +0100 Subject: [PATCH 0908/1056] mt76: fix use-after-free by removing a non-RCU wcid pointer [ Upstream commit 51fb1278aa57ae0fc54adaa786e1965362bed4fb ] Fixes an issue caught by KASAN about use-after-free in mt76_txq_schedule by protecting mtxq->wcid with rcu_lock between mt76_txq_schedule and sta_info_[alloc, free]. [18853.876689] ================================================================== [18853.876751] BUG: KASAN: use-after-free in mt76_txq_schedule+0x204/0xaf8 [mt76] [18853.876773] Read of size 8 at addr ffffffaf989a2138 by task mt76-tx phy0/883 [18853.876786] [18853.876810] CPU: 5 PID: 883 Comm: mt76-tx phy0 Not tainted 5.10.100-fix-510-56778d365941-kasan #5 0b01fbbcf41a530f52043508fec2e31a4215 [18853.876840] Call trace: [18853.876861] dump_backtrace+0x0/0x3ec [18853.876878] show_stack+0x20/0x2c [18853.876899] dump_stack+0x11c/0x1ac [18853.876918] print_address_description+0x74/0x514 [18853.876934] kasan_report+0x134/0x174 [18853.876948] __asan_report_load8_noabort+0x44/0x50 [18853.876976] mt76_txq_schedule+0x204/0xaf8 [mt76 074e03e4640e97fe7405ee1fab547b81c4fa45d2] [18853.877002] mt76_txq_schedule_all+0x2c/0x48 [mt76 074e03e4640e97fe7405ee1fab547b81c4fa45d2] [18853.877030] mt7921_tx_worker+0xa0/0x1cc [mt7921_common f0875ebac9d7b4754e1010549e7db50fbd90a047] [18853.877054] __mt76_worker_fn+0x190/0x22c [mt76 074e03e4640e97fe7405ee1fab547b81c4fa45d2] [18853.877071] kthread+0x2f8/0x3b8 [18853.877087] ret_from_fork+0x10/0x30 [18853.877098] [18853.877112] Allocated by task 941: [18853.877131] kasan_save_stack+0x38/0x68 [18853.877147] __kasan_kmalloc+0xd4/0xfc [18853.877163] kasan_kmalloc+0x10/0x1c [18853.877177] __kmalloc+0x264/0x3c4 [18853.877294] sta_info_alloc+0x460/0xf88 [mac80211] [18853.877410] ieee80211_prep_connection+0x204/0x1ee0 [mac80211] [18853.877523] ieee80211_mgd_auth+0x6c4/0xa4c [mac80211] [18853.877635] ieee80211_auth+0x20/0x2c [mac80211] [18853.877733] rdev_auth+0x7c/0x438 [cfg80211] [18853.877826] cfg80211_mlme_auth+0x26c/0x390 [cfg80211] [18853.877919] nl80211_authenticate+0x6d4/0x904 [cfg80211] [18853.877938] genl_rcv_msg+0x748/0x93c [18853.877954] netlink_rcv_skb+0x160/0x2a8 [18853.877969] genl_rcv+0x3c/0x54 [18853.877985] netlink_unicast_kernel+0x104/0x1ec [18853.877999] netlink_unicast+0x178/0x268 [18853.878015] netlink_sendmsg+0x3cc/0x5f0 [18853.878030] sock_sendmsg+0xb4/0xd8 [18853.878043] ____sys_sendmsg+0x2f8/0x53c [18853.878058] ___sys_sendmsg+0xe8/0x150 [18853.878071] __sys_sendmsg+0xc4/0x1f4 [18853.878087] __arm64_compat_sys_sendmsg+0x88/0x9c [18853.878101] el0_svc_common+0x1b4/0x390 [18853.878115] do_el0_svc_compat+0x8c/0xdc [18853.878131] el0_svc_compat+0x10/0x1c [18853.878146] el0_sync_compat_handler+0xa8/0xcc [18853.878161] el0_sync_compat+0x188/0x1c0 [18853.878171] [18853.878183] Freed by task 10927: [18853.878200] kasan_save_stack+0x38/0x68 [18853.878215] kasan_set_track+0x28/0x3c [18853.878228] kasan_set_free_info+0x24/0x48 [18853.878244] __kasan_slab_free+0x11c/0x154 [18853.878259] kasan_slab_free+0x14/0x24 [18853.878273] slab_free_freelist_hook+0xac/0x1b0 [18853.878287] kfree+0x104/0x390 [18853.878402] sta_info_free+0x198/0x210 [mac80211] [18853.878515] __sta_info_destroy_part2+0x230/0x2d4 [mac80211] [18853.878628] __sta_info_flush+0x300/0x37c [mac80211] [18853.878740] ieee80211_set_disassoc+0x2cc/0xa7c [mac80211] [18853.878851] ieee80211_mgd_deauth+0x4a4/0x10a0 [mac80211] [18853.878962] ieee80211_deauth+0x20/0x2c [mac80211] [18853.879057] rdev_deauth+0x7c/0x438 [cfg80211] [18853.879150] cfg80211_mlme_deauth+0x274/0x414 [cfg80211] [18853.879243] cfg80211_mlme_down+0xe4/0x118 [cfg80211] [18853.879335] cfg80211_disconnect+0x218/0x2d8 [cfg80211] [18853.879427] __cfg80211_leave+0x17c/0x240 [cfg80211] [18853.879519] cfg80211_leave+0x3c/0x58 [cfg80211] [18853.879611] wiphy_suspend+0xdc/0x200 [cfg80211] [18853.879628] dpm_run_callback+0x58/0x408 [18853.879642] __device_suspend+0x4cc/0x864 [18853.879658] async_suspend+0x34/0xf4 [18853.879673] async_run_entry_fn+0xe0/0x37c [18853.879689] process_one_work+0x508/0xb98 [18853.879702] worker_thread+0x7f4/0xcd4 [18853.879717] kthread+0x2f8/0x3b8 [18853.879731] ret_from_fork+0x10/0x30 [18853.879741] [18853.879757] The buggy address belongs to the object at ffffffaf989a2000 [18853.879757] which belongs to the cache kmalloc-8k of size 8192 [18853.879774] The buggy address is located 312 bytes inside of [18853.879774] 8192-byte region [ffffffaf989a2000, ffffffaf989a4000) [18853.879787] The buggy address belongs to the page: [18853.879807] page:000000004bda2a59 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x1d89a0 [18853.879823] head:000000004bda2a59 order:3 compound_mapcount:0 compound_pincount:0 [18853.879839] flags: 0x8000000000010200(slab|head) [18853.879857] raw: 8000000000010200 ffffffffbc89e208 ffffffffb7fb5208 ffffffaec000cc80 [18853.879873] raw: 0000000000000000 0000000000010001 00000001ffffffff 0000000000000000 [18853.879885] page dumped because: kasan: bad access detected [18853.879896] [18853.879907] Memory state around the buggy address: [18853.879922] ffffffaf989a2000: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [18853.879935] ffffffaf989a2080: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [18853.879948] >ffffffaf989a2100: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [18853.879961] ^ [18853.879973] ffffffaf989a2180: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [18853.879986] ffffffaf989a2200: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [18853.879998] ================================================================== Cc: stable@vger.kernel.org Reported-by: Sean Wang Signed-off-by: Felix Fietkau Signed-off-by: Sasha Levin --- drivers/net/wireless/mediatek/mt76/mac80211.c | 2 +- drivers/net/wireless/mediatek/mt76/mt76.h | 2 +- drivers/net/wireless/mediatek/mt76/mt7603/main.c | 2 +- drivers/net/wireless/mediatek/mt76/mt7615/main.c | 2 +- drivers/net/wireless/mediatek/mt76/mt76x02_util.c | 4 +++- drivers/net/wireless/mediatek/mt76/mt7915/main.c | 2 +- drivers/net/wireless/mediatek/mt76/mt7921/main.c | 2 +- drivers/net/wireless/mediatek/mt76/tx.c | 9 ++++----- 8 files changed, 13 insertions(+), 12 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mac80211.c b/drivers/net/wireless/mediatek/mt76/mac80211.c index d03aedc3286bb6..029599d68ca713 100644 --- a/drivers/net/wireless/mediatek/mt76/mac80211.c +++ b/drivers/net/wireless/mediatek/mt76/mac80211.c @@ -1100,7 +1100,7 @@ mt76_sta_add(struct mt76_dev *dev, struct ieee80211_vif *vif, continue; mtxq = (struct mt76_txq *)sta->txq[i]->drv_priv; - mtxq->wcid = wcid; + mtxq->wcid = wcid->idx; } ewma_signal_init(&wcid->rssi); diff --git a/drivers/net/wireless/mediatek/mt76/mt76.h b/drivers/net/wireless/mediatek/mt76/mt76.h index 6e4d697159279a..d1f00706d41ec3 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76.h +++ b/drivers/net/wireless/mediatek/mt76/mt76.h @@ -263,7 +263,7 @@ struct mt76_wcid { }; struct mt76_txq { - struct mt76_wcid *wcid; + u16 wcid; u16 agg_ssn; bool send_bar; diff --git a/drivers/net/wireless/mediatek/mt76/mt7603/main.c b/drivers/net/wireless/mediatek/mt76/mt7603/main.c index 7f52a4a11ceaab..0b7b87b4cc21ca 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7603/main.c +++ b/drivers/net/wireless/mediatek/mt76/mt7603/main.c @@ -74,7 +74,7 @@ mt7603_add_interface(struct ieee80211_hw *hw, struct ieee80211_vif *vif) mt7603_wtbl_init(dev, idx, mvif->idx, bc_addr); mtxq = (struct mt76_txq *)vif->txq->drv_priv; - mtxq->wcid = &mvif->sta.wcid; + mtxq->wcid = idx; rcu_assign_pointer(dev->mt76.wcid[idx], &mvif->sta.wcid); out: diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/main.c b/drivers/net/wireless/mediatek/mt76/mt7615/main.c index 60a41d08296117..7c52a4d85ceab4 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7615/main.c +++ b/drivers/net/wireless/mediatek/mt76/mt7615/main.c @@ -235,7 +235,7 @@ static int mt7615_add_interface(struct ieee80211_hw *hw, rcu_assign_pointer(dev->mt76.wcid[idx], &mvif->sta.wcid); if (vif->txq) { mtxq = (struct mt76_txq *)vif->txq->drv_priv; - mtxq->wcid = &mvif->sta.wcid; + mtxq->wcid = idx; } ret = mt7615_mcu_add_dev_info(phy, vif, true); diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02_util.c b/drivers/net/wireless/mediatek/mt76/mt76x02_util.c index ccdbab34127146..db7a4ffcad5587 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x02_util.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x02_util.c @@ -288,7 +288,8 @@ mt76x02_vif_init(struct mt76x02_dev *dev, struct ieee80211_vif *vif, mvif->group_wcid.idx = MT_VIF_WCID(idx); mvif->group_wcid.hw_key_idx = -1; mtxq = (struct mt76_txq *)vif->txq->drv_priv; - mtxq->wcid = &mvif->group_wcid; + rcu_assign_pointer(dev->mt76.wcid[MT_VIF_WCID(idx)], &mvif->group_wcid); + mtxq->wcid = MT_VIF_WCID(idx); } int @@ -341,6 +342,7 @@ void mt76x02_remove_interface(struct ieee80211_hw *hw, struct mt76x02_vif *mvif = (struct mt76x02_vif *)vif->drv_priv; dev->mt76.vif_mask &= ~BIT(mvif->idx); + rcu_assign_pointer(dev->mt76.wcid[mvif->group_wcid.idx], NULL); } EXPORT_SYMBOL_GPL(mt76x02_remove_interface); diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/main.c b/drivers/net/wireless/mediatek/mt76/mt7915/main.c index c25f8da590dd91..6aca470e24013a 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7915/main.c +++ b/drivers/net/wireless/mediatek/mt76/mt7915/main.c @@ -243,7 +243,7 @@ static int mt7915_add_interface(struct ieee80211_hw *hw, rcu_assign_pointer(dev->mt76.wcid[idx], &mvif->sta.wcid); if (vif->txq) { mtxq = (struct mt76_txq *)vif->txq->drv_priv; - mtxq->wcid = &mvif->sta.wcid; + mtxq->wcid = idx; } if (vif->type != NL80211_IFTYPE_AP && diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/main.c b/drivers/net/wireless/mediatek/mt76/mt7921/main.c index 13a7ae3d83516e..6cb65391427f17 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7921/main.c +++ b/drivers/net/wireless/mediatek/mt76/mt7921/main.c @@ -283,7 +283,7 @@ static int mt7921_add_interface(struct ieee80211_hw *hw, rcu_assign_pointer(dev->mt76.wcid[idx], &mvif->sta.wcid); if (vif->txq) { mtxq = (struct mt76_txq *)vif->txq->drv_priv; - mtxq->wcid = &mvif->sta.wcid; + mtxq->wcid = idx; } out: diff --git a/drivers/net/wireless/mediatek/mt76/tx.c b/drivers/net/wireless/mediatek/mt76/tx.c index f0f7a913eaabfe..dce6f6b5f07122 100644 --- a/drivers/net/wireless/mediatek/mt76/tx.c +++ b/drivers/net/wireless/mediatek/mt76/tx.c @@ -406,12 +406,11 @@ mt76_txq_stopped(struct mt76_queue *q) static int mt76_txq_send_burst(struct mt76_phy *phy, struct mt76_queue *q, - struct mt76_txq *mtxq) + struct mt76_txq *mtxq, struct mt76_wcid *wcid) { struct mt76_dev *dev = phy->dev; struct ieee80211_txq *txq = mtxq_to_txq(mtxq); enum mt76_txq_id qid = mt76_txq_get_qid(txq); - struct mt76_wcid *wcid = mtxq->wcid; struct ieee80211_tx_info *info; struct sk_buff *skb; int n_frames = 1; @@ -491,8 +490,8 @@ mt76_txq_schedule_list(struct mt76_phy *phy, enum mt76_txq_id qid) break; mtxq = (struct mt76_txq *)txq->drv_priv; - wcid = mtxq->wcid; - if (wcid && test_bit(MT_WCID_FLAG_PS, &wcid->flags)) + wcid = rcu_dereference(dev->wcid[mtxq->wcid]); + if (!wcid || test_bit(MT_WCID_FLAG_PS, &wcid->flags)) continue; spin_lock_bh(&q->lock); @@ -511,7 +510,7 @@ mt76_txq_schedule_list(struct mt76_phy *phy, enum mt76_txq_id qid) } if (!mt76_txq_stopped(q)) - n_frames = mt76_txq_send_burst(phy, q, mtxq); + n_frames = mt76_txq_send_burst(phy, q, mtxq, wcid); spin_unlock_bh(&q->lock); From c1f6637fe1c2e14edf53fb69f0d0eef2d6e4c0e9 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Sun, 24 Oct 2021 18:20:26 +0300 Subject: [PATCH 0909/1056] iwlwifi: fw: uefi: add missing include guards [ Upstream commit 91000fdf82195b66350b4f88413c2e8b5f94d994 ] We still don't use #pragma once in the kernel, but even if we did it'd be missing. Add the missing include guards. Signed-off-by: Johannes Berg Fixes: 84c3c9952afb ("iwlwifi: move UEFI code to a separate file") Signed-off-by: Luca Coelho Link: https://lore.kernel.org/r/iwlwifi.20211024181719.7fc9988ed49b.I87e300fab664047581e51fb9b02744c75320d08c@changeid Signed-off-by: Luca Coelho Signed-off-by: Sasha Levin --- drivers/net/wireless/intel/iwlwifi/fw/uefi.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/uefi.h b/drivers/net/wireless/intel/iwlwifi/fw/uefi.h index 45d0b36d79b5ae..d552c656ac9fe1 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/uefi.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/uefi.h @@ -2,7 +2,8 @@ /* * Copyright(c) 2021 Intel Corporation */ - +#ifndef __iwl_fw_uefi__ +#define __iwl_fw_uefi__ #define IWL_UEFI_OEM_PNVM_NAME L"UefiCnvWlanOemSignedPnvm" #define IWL_UEFI_REDUCED_POWER_NAME L"UefiCnvWlanReducedPower" @@ -40,3 +41,5 @@ void *iwl_uefi_get_reduced_power(struct iwl_trans *trans, size_t *len) return ERR_PTR(-EOPNOTSUPP); } #endif /* CONFIG_EFI */ + +#endif /* __iwl_fw_uefi__ */ From 343cee3eafda7e221641723993ed9f6bf407e74a Mon Sep 17 00:00:00 2001 From: Giovanni Cabiddu Date: Mon, 9 May 2022 14:19:27 +0100 Subject: [PATCH 0910/1056] crypto: qat - set to zero DH parameters before free [ Upstream commit 1731160ff7c7bbb11bb1aacb14dd25e18d522779 ] Set to zero the context buffers containing the DH key before they are freed. This is a defense in depth measure that avoids keys to be recovered from memory in case the system is compromised between the free of the buffer and when that area of memory (containing keys) gets overwritten. Cc: stable@vger.kernel.org Fixes: c9839143ebbf ("crypto: qat - Add DH support") Signed-off-by: Giovanni Cabiddu Reviewed-by: Adam Guerin Reviewed-by: Wojciech Ziemba Signed-off-by: Herbert Xu Signed-off-by: Sasha Levin --- drivers/crypto/qat/qat_common/qat_asym_algs.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/crypto/qat/qat_common/qat_asym_algs.c b/drivers/crypto/qat/qat_common/qat_asym_algs.c index b0b78445418bb4..5633f9df3b6fe3 100644 --- a/drivers/crypto/qat/qat_common/qat_asym_algs.c +++ b/drivers/crypto/qat/qat_common/qat_asym_algs.c @@ -420,14 +420,17 @@ static int qat_dh_set_params(struct qat_dh_ctx *ctx, struct dh *params) static void qat_dh_clear_ctx(struct device *dev, struct qat_dh_ctx *ctx) { if (ctx->g) { + memset(ctx->g, 0, ctx->p_size); dma_free_coherent(dev, ctx->p_size, ctx->g, ctx->dma_g); ctx->g = NULL; } if (ctx->xa) { + memset(ctx->xa, 0, ctx->p_size); dma_free_coherent(dev, ctx->p_size, ctx->xa, ctx->dma_xa); ctx->xa = NULL; } if (ctx->p) { + memset(ctx->p, 0, ctx->p_size); dma_free_coherent(dev, ctx->p_size, ctx->p, ctx->dma_p); ctx->p = NULL; } From f576c7e01a6a9293402523e39827df2ee2001cf1 Mon Sep 17 00:00:00 2001 From: Giovanni Cabiddu Date: Mon, 9 May 2022 14:34:08 +0100 Subject: [PATCH 0911/1056] crypto: qat - use pre-allocated buffers in datapath [ Upstream commit e0831e7af4e03f2715de102e18e9179ec0a81562 ] In order to do DMAs, the QAT device requires that the scatterlist structures are mapped and translated into a format that the firmware can understand. This is defined as the composition of a scatter gather list (SGL) descriptor header, the struct qat_alg_buf_list, plus a variable number of flat buffer descriptors, the struct qat_alg_buf. The allocation and mapping of these data structures is done each time a request is received from the skcipher and aead APIs. In an OOM situation, this behaviour might lead to a dead-lock if an allocation fails. Based on the conversation in [1], increase the size of the aead and skcipher request contexts to include an SGL descriptor that can handle a maximum of 4 flat buffers. If requests exceed 4 entries buffers, memory is allocated dynamically. [1] https://lore.kernel.org/linux-crypto/20200722072932.GA27544@gondor.apana.org.au/ Cc: stable@vger.kernel.org Fixes: d370cec32194 ("crypto: qat - Intel(R) QAT crypto interface") Reported-by: Mikulas Patocka Signed-off-by: Giovanni Cabiddu Reviewed-by: Marco Chiappero Reviewed-by: Wojciech Ziemba Signed-off-by: Herbert Xu Signed-off-by: Sasha Levin --- drivers/crypto/qat/qat_common/qat_algs.c | 64 +++++++++++++--------- drivers/crypto/qat/qat_common/qat_crypto.h | 24 ++++++++ 2 files changed, 61 insertions(+), 27 deletions(-) diff --git a/drivers/crypto/qat/qat_common/qat_algs.c b/drivers/crypto/qat/qat_common/qat_algs.c index f998ed58457c24..ec635fe44c1f4a 100644 --- a/drivers/crypto/qat/qat_common/qat_algs.c +++ b/drivers/crypto/qat/qat_common/qat_algs.c @@ -46,19 +46,6 @@ static DEFINE_MUTEX(algs_lock); static unsigned int active_devs; -struct qat_alg_buf { - u32 len; - u32 resrvd; - u64 addr; -} __packed; - -struct qat_alg_buf_list { - u64 resrvd; - u32 num_bufs; - u32 num_mapped_bufs; - struct qat_alg_buf bufers[]; -} __packed __aligned(64); - /* Common content descriptor */ struct qat_alg_cd { union { @@ -693,7 +680,10 @@ static void qat_alg_free_bufl(struct qat_crypto_instance *inst, bl->bufers[i].len, DMA_BIDIRECTIONAL); dma_unmap_single(dev, blp, sz, DMA_TO_DEVICE); - kfree(bl); + + if (!qat_req->buf.sgl_src_valid) + kfree(bl); + if (blp != blpout) { /* If out of place operation dma unmap only data */ int bufless = blout->num_bufs - blout->num_mapped_bufs; @@ -704,7 +694,9 @@ static void qat_alg_free_bufl(struct qat_crypto_instance *inst, DMA_BIDIRECTIONAL); } dma_unmap_single(dev, blpout, sz_out, DMA_TO_DEVICE); - kfree(blout); + + if (!qat_req->buf.sgl_dst_valid) + kfree(blout); } } @@ -721,15 +713,24 @@ static int qat_alg_sgl_to_bufl(struct qat_crypto_instance *inst, dma_addr_t blp = DMA_MAPPING_ERROR; dma_addr_t bloutp = DMA_MAPPING_ERROR; struct scatterlist *sg; - size_t sz_out, sz = struct_size(bufl, bufers, n + 1); + size_t sz_out, sz = struct_size(bufl, bufers, n); + int node = dev_to_node(&GET_DEV(inst->accel_dev)); if (unlikely(!n)) return -EINVAL; - bufl = kzalloc_node(sz, GFP_ATOMIC, - dev_to_node(&GET_DEV(inst->accel_dev))); - if (unlikely(!bufl)) - return -ENOMEM; + qat_req->buf.sgl_src_valid = false; + qat_req->buf.sgl_dst_valid = false; + + if (n > QAT_MAX_BUFF_DESC) { + bufl = kzalloc_node(sz, GFP_ATOMIC, node); + if (unlikely(!bufl)) + return -ENOMEM; + } else { + bufl = &qat_req->buf.sgl_src.sgl_hdr; + memset(bufl, 0, sizeof(struct qat_alg_buf_list)); + qat_req->buf.sgl_src_valid = true; + } for_each_sg(sgl, sg, n, i) bufl->bufers[i].addr = DMA_MAPPING_ERROR; @@ -760,12 +761,18 @@ static int qat_alg_sgl_to_bufl(struct qat_crypto_instance *inst, struct qat_alg_buf *bufers; n = sg_nents(sglout); - sz_out = struct_size(buflout, bufers, n + 1); + sz_out = struct_size(buflout, bufers, n); sg_nctr = 0; - buflout = kzalloc_node(sz_out, GFP_ATOMIC, - dev_to_node(&GET_DEV(inst->accel_dev))); - if (unlikely(!buflout)) - goto err_in; + + if (n > QAT_MAX_BUFF_DESC) { + buflout = kzalloc_node(sz_out, GFP_ATOMIC, node); + if (unlikely(!buflout)) + goto err_in; + } else { + buflout = &qat_req->buf.sgl_dst.sgl_hdr; + memset(buflout, 0, sizeof(struct qat_alg_buf_list)); + qat_req->buf.sgl_dst_valid = true; + } bufers = buflout->bufers; for_each_sg(sglout, sg, n, i) @@ -810,7 +817,9 @@ static int qat_alg_sgl_to_bufl(struct qat_crypto_instance *inst, dma_unmap_single(dev, buflout->bufers[i].addr, buflout->bufers[i].len, DMA_BIDIRECTIONAL); - kfree(buflout); + + if (!qat_req->buf.sgl_dst_valid) + kfree(buflout); err_in: if (!dma_mapping_error(dev, blp)) @@ -823,7 +832,8 @@ static int qat_alg_sgl_to_bufl(struct qat_crypto_instance *inst, bufl->bufers[i].len, DMA_BIDIRECTIONAL); - kfree(bufl); + if (!qat_req->buf.sgl_src_valid) + kfree(bufl); dev_err(dev, "Failed to map buf for dma\n"); return -ENOMEM; diff --git a/drivers/crypto/qat/qat_common/qat_crypto.h b/drivers/crypto/qat/qat_common/qat_crypto.h index b6a4c95ae003f1..0928f159ea9933 100644 --- a/drivers/crypto/qat/qat_common/qat_crypto.h +++ b/drivers/crypto/qat/qat_common/qat_crypto.h @@ -21,6 +21,26 @@ struct qat_crypto_instance { atomic_t refctr; }; +#define QAT_MAX_BUFF_DESC 4 + +struct qat_alg_buf { + u32 len; + u32 resrvd; + u64 addr; +} __packed; + +struct qat_alg_buf_list { + u64 resrvd; + u32 num_bufs; + u32 num_mapped_bufs; + struct qat_alg_buf bufers[]; +} __packed; + +struct qat_alg_fixed_buf_list { + struct qat_alg_buf_list sgl_hdr; + struct qat_alg_buf descriptors[QAT_MAX_BUFF_DESC]; +} __packed __aligned(64); + struct qat_crypto_request_buffs { struct qat_alg_buf_list *bl; dma_addr_t blp; @@ -28,6 +48,10 @@ struct qat_crypto_request_buffs { dma_addr_t bloutp; size_t sz; size_t sz_out; + bool sgl_src_valid; + bool sgl_dst_valid; + struct qat_alg_fixed_buf_list sgl_src; + struct qat_alg_fixed_buf_list sgl_dst; }; struct qat_crypto_request; From 9cac903b63031a80fedf2bc3c8fbd489d1310876 Mon Sep 17 00:00:00 2001 From: Giovanni Cabiddu Date: Mon, 9 May 2022 14:34:09 +0100 Subject: [PATCH 0912/1056] crypto: qat - refactor submission logic [ Upstream commit af88d3c109aa5edfaa11c9a26d9c0ff21ddf501c ] All the algorithms in qat_algs.c and qat_asym_algs.c use the same pattern to submit messages to the HW queues. Move the submission loop to a new function, qat_alg_send_message(), and share it between the symmetric and the asymmetric algorithms. As part of this rework, since the number of retries before returning an error is inconsistent between the symmetric and asymmetric implementations, set it to a value that works for both (i.e. 20, was 10 in qat_algs.c and 100 in qat_asym_algs.c) In addition fix the return code reported when the HW queues are full. In that case return -ENOSPC instead of -EBUSY. Including stable in CC since (1) the error code returned if the HW queues are full is incorrect and (2) to facilitate the backport of the next fix "crypto: qat - add backlog mechanism". Cc: stable@vger.kernel.org Signed-off-by: Giovanni Cabiddu Reviewed-by: Marco Chiappero Signed-off-by: Herbert Xu Signed-off-by: Sasha Levin --- drivers/crypto/qat/qat_common/Makefile | 1 + drivers/crypto/qat/qat_common/qat_algs.c | 68 +++++++++---------- drivers/crypto/qat/qat_common/qat_algs_send.c | 21 ++++++ drivers/crypto/qat/qat_common/qat_algs_send.h | 10 +++ drivers/crypto/qat/qat_common/qat_asym_algs.c | 50 +++++++++----- drivers/crypto/qat/qat_common/qat_crypto.h | 5 ++ 6 files changed, 101 insertions(+), 54 deletions(-) create mode 100644 drivers/crypto/qat/qat_common/qat_algs_send.c create mode 100644 drivers/crypto/qat/qat_common/qat_algs_send.h diff --git a/drivers/crypto/qat/qat_common/Makefile b/drivers/crypto/qat/qat_common/Makefile index 9c57abdf56b78d..fc477f01621351 100644 --- a/drivers/crypto/qat/qat_common/Makefile +++ b/drivers/crypto/qat/qat_common/Makefile @@ -15,6 +15,7 @@ intel_qat-objs := adf_cfg.o \ qat_crypto.o \ qat_algs.o \ qat_asym_algs.o \ + qat_algs_send.o \ qat_uclo.o \ qat_hal.o diff --git a/drivers/crypto/qat/qat_common/qat_algs.c b/drivers/crypto/qat/qat_common/qat_algs.c index ec635fe44c1f4a..6017ae82c7133d 100644 --- a/drivers/crypto/qat/qat_common/qat_algs.c +++ b/drivers/crypto/qat/qat_common/qat_algs.c @@ -17,7 +17,7 @@ #include #include #include "adf_accel_devices.h" -#include "adf_transport.h" +#include "qat_algs_send.h" #include "adf_common_drv.h" #include "qat_crypto.h" #include "icp_qat_hw.h" @@ -939,6 +939,17 @@ void qat_alg_callback(void *resp) qat_req->cb(qat_resp, qat_req); } +static int qat_alg_send_sym_message(struct qat_crypto_request *qat_req, + struct qat_crypto_instance *inst) +{ + struct qat_alg_req req; + + req.fw_req = (u32 *)&qat_req->req; + req.tx_ring = inst->sym_tx; + + return qat_alg_send_message(&req); +} + static int qat_alg_aead_dec(struct aead_request *areq) { struct crypto_aead *aead_tfm = crypto_aead_reqtfm(areq); @@ -949,7 +960,7 @@ static int qat_alg_aead_dec(struct aead_request *areq) struct icp_qat_fw_la_auth_req_params *auth_param; struct icp_qat_fw_la_bulk_req *msg; int digst_size = crypto_aead_authsize(aead_tfm); - int ret, ctr = 0; + int ret; u32 cipher_len; cipher_len = areq->cryptlen - digst_size; @@ -975,15 +986,12 @@ static int qat_alg_aead_dec(struct aead_request *areq) auth_param = (void *)((u8 *)cipher_param + sizeof(*cipher_param)); auth_param->auth_off = 0; auth_param->auth_len = areq->assoclen + cipher_param->cipher_length; - do { - ret = adf_send_message(ctx->inst->sym_tx, (u32 *)msg); - } while (ret == -EAGAIN && ctr++ < 10); - if (ret == -EAGAIN) { + ret = qat_alg_send_sym_message(qat_req, ctx->inst); + if (ret == -ENOSPC) qat_alg_free_bufl(ctx->inst, qat_req); - return -EBUSY; - } - return -EINPROGRESS; + + return ret; } static int qat_alg_aead_enc(struct aead_request *areq) @@ -996,7 +1004,7 @@ static int qat_alg_aead_enc(struct aead_request *areq) struct icp_qat_fw_la_auth_req_params *auth_param; struct icp_qat_fw_la_bulk_req *msg; u8 *iv = areq->iv; - int ret, ctr = 0; + int ret; if (areq->cryptlen % AES_BLOCK_SIZE != 0) return -EINVAL; @@ -1023,15 +1031,11 @@ static int qat_alg_aead_enc(struct aead_request *areq) auth_param->auth_off = 0; auth_param->auth_len = areq->assoclen + areq->cryptlen; - do { - ret = adf_send_message(ctx->inst->sym_tx, (u32 *)msg); - } while (ret == -EAGAIN && ctr++ < 10); - - if (ret == -EAGAIN) { + ret = qat_alg_send_sym_message(qat_req, ctx->inst); + if (ret == -ENOSPC) qat_alg_free_bufl(ctx->inst, qat_req); - return -EBUSY; - } - return -EINPROGRESS; + + return ret; } static int qat_alg_skcipher_rekey(struct qat_alg_skcipher_ctx *ctx, @@ -1184,7 +1188,7 @@ static int qat_alg_skcipher_encrypt(struct skcipher_request *req) struct qat_crypto_request *qat_req = skcipher_request_ctx(req); struct icp_qat_fw_la_cipher_req_params *cipher_param; struct icp_qat_fw_la_bulk_req *msg; - int ret, ctr = 0; + int ret; if (req->cryptlen == 0) return 0; @@ -1208,15 +1212,11 @@ static int qat_alg_skcipher_encrypt(struct skcipher_request *req) qat_alg_set_req_iv(qat_req); - do { - ret = adf_send_message(ctx->inst->sym_tx, (u32 *)msg); - } while (ret == -EAGAIN && ctr++ < 10); - - if (ret == -EAGAIN) { + ret = qat_alg_send_sym_message(qat_req, ctx->inst); + if (ret == -ENOSPC) qat_alg_free_bufl(ctx->inst, qat_req); - return -EBUSY; - } - return -EINPROGRESS; + + return ret; } static int qat_alg_skcipher_blk_encrypt(struct skcipher_request *req) @@ -1253,7 +1253,7 @@ static int qat_alg_skcipher_decrypt(struct skcipher_request *req) struct qat_crypto_request *qat_req = skcipher_request_ctx(req); struct icp_qat_fw_la_cipher_req_params *cipher_param; struct icp_qat_fw_la_bulk_req *msg; - int ret, ctr = 0; + int ret; if (req->cryptlen == 0) return 0; @@ -1278,15 +1278,11 @@ static int qat_alg_skcipher_decrypt(struct skcipher_request *req) qat_alg_set_req_iv(qat_req); qat_alg_update_iv(qat_req); - do { - ret = adf_send_message(ctx->inst->sym_tx, (u32 *)msg); - } while (ret == -EAGAIN && ctr++ < 10); - - if (ret == -EAGAIN) { + ret = qat_alg_send_sym_message(qat_req, ctx->inst); + if (ret == -ENOSPC) qat_alg_free_bufl(ctx->inst, qat_req); - return -EBUSY; - } - return -EINPROGRESS; + + return ret; } static int qat_alg_skcipher_blk_decrypt(struct skcipher_request *req) diff --git a/drivers/crypto/qat/qat_common/qat_algs_send.c b/drivers/crypto/qat/qat_common/qat_algs_send.c new file mode 100644 index 00000000000000..78f1bb8c26c081 --- /dev/null +++ b/drivers/crypto/qat/qat_common/qat_algs_send.c @@ -0,0 +1,21 @@ +// SPDX-License-Identifier: (BSD-3-Clause OR GPL-2.0-only) +/* Copyright(c) 2022 Intel Corporation */ +#include "adf_transport.h" +#include "qat_algs_send.h" +#include "qat_crypto.h" + +#define ADF_MAX_RETRIES 20 + +int qat_alg_send_message(struct qat_alg_req *req) +{ + int ret = 0, ctr = 0; + + do { + ret = adf_send_message(req->tx_ring, req->fw_req); + } while (ret == -EAGAIN && ctr++ < ADF_MAX_RETRIES); + + if (ret == -EAGAIN) + return -ENOSPC; + + return -EINPROGRESS; +} diff --git a/drivers/crypto/qat/qat_common/qat_algs_send.h b/drivers/crypto/qat/qat_common/qat_algs_send.h new file mode 100644 index 00000000000000..3fa685d0c293aa --- /dev/null +++ b/drivers/crypto/qat/qat_common/qat_algs_send.h @@ -0,0 +1,10 @@ +/* SPDX-License-Identifier: (BSD-3-Clause OR GPL-2.0-only) */ +/* Copyright(c) 2022 Intel Corporation */ +#ifndef QAT_ALGS_SEND_H +#define QAT_ALGS_SEND_H + +#include "qat_crypto.h" + +int qat_alg_send_message(struct qat_alg_req *req); + +#endif diff --git a/drivers/crypto/qat/qat_common/qat_asym_algs.c b/drivers/crypto/qat/qat_common/qat_asym_algs.c index 5633f9df3b6fe3..08b8d83e070a0e 100644 --- a/drivers/crypto/qat/qat_common/qat_asym_algs.c +++ b/drivers/crypto/qat/qat_common/qat_asym_algs.c @@ -12,6 +12,7 @@ #include #include "icp_qat_fw_pke.h" #include "adf_accel_devices.h" +#include "qat_algs_send.h" #include "adf_transport.h" #include "adf_common_drv.h" #include "qat_crypto.h" @@ -137,6 +138,17 @@ struct qat_asym_request { void (*cb)(struct icp_qat_fw_pke_resp *resp); } __aligned(64); +static int qat_alg_send_asym_message(struct qat_asym_request *qat_req, + struct qat_crypto_instance *inst) +{ + struct qat_alg_req req; + + req.fw_req = (u32 *)&qat_req->req; + req.tx_ring = inst->pke_tx; + + return qat_alg_send_message(&req); +} + static void qat_dh_cb(struct icp_qat_fw_pke_resp *resp) { struct qat_asym_request *req = (void *)(__force long)resp->opaque; @@ -213,7 +225,7 @@ static int qat_dh_compute_value(struct kpp_request *req) struct qat_asym_request *qat_req = PTR_ALIGN(kpp_request_ctx(req), 64); struct icp_qat_fw_pke_request *msg = &qat_req->req; - int ret, ctr = 0; + int ret; int n_input_params = 0; if (unlikely(!ctx->xa)) @@ -338,13 +350,13 @@ static int qat_dh_compute_value(struct kpp_request *req) msg->input_param_count = n_input_params; msg->output_param_count = 1; - do { - ret = adf_send_message(ctx->inst->pke_tx, (u32 *)msg); - } while (ret == -EBUSY && ctr++ < 100); + ret = qat_alg_send_asym_message(qat_req, ctx->inst); + if (ret == -ENOSPC) + goto unmap_all; - if (!ret) - return -EINPROGRESS; + return ret; +unmap_all: if (!dma_mapping_error(dev, qat_req->phy_out)) dma_unmap_single(dev, qat_req->phy_out, sizeof(struct qat_dh_output_params), @@ -645,7 +657,7 @@ static int qat_rsa_enc(struct akcipher_request *req) struct qat_asym_request *qat_req = PTR_ALIGN(akcipher_request_ctx(req), 64); struct icp_qat_fw_pke_request *msg = &qat_req->req; - int ret, ctr = 0; + int ret; if (unlikely(!ctx->n || !ctx->e)) return -EINVAL; @@ -735,13 +747,14 @@ static int qat_rsa_enc(struct akcipher_request *req) msg->pke_mid.opaque = (u64)(__force long)qat_req; msg->input_param_count = 3; msg->output_param_count = 1; - do { - ret = adf_send_message(ctx->inst->pke_tx, (u32 *)msg); - } while (ret == -EBUSY && ctr++ < 100); - if (!ret) - return -EINPROGRESS; + ret = qat_alg_send_asym_message(qat_req, ctx->inst); + if (ret == -ENOSPC) + goto unmap_all; + + return ret; +unmap_all: if (!dma_mapping_error(dev, qat_req->phy_out)) dma_unmap_single(dev, qat_req->phy_out, sizeof(struct qat_rsa_output_params), @@ -779,7 +792,7 @@ static int qat_rsa_dec(struct akcipher_request *req) struct qat_asym_request *qat_req = PTR_ALIGN(akcipher_request_ctx(req), 64); struct icp_qat_fw_pke_request *msg = &qat_req->req; - int ret, ctr = 0; + int ret; if (unlikely(!ctx->n || !ctx->d)) return -EINVAL; @@ -887,13 +900,14 @@ static int qat_rsa_dec(struct akcipher_request *req) msg->input_param_count = 3; msg->output_param_count = 1; - do { - ret = adf_send_message(ctx->inst->pke_tx, (u32 *)msg); - } while (ret == -EBUSY && ctr++ < 100); - if (!ret) - return -EINPROGRESS; + ret = qat_alg_send_asym_message(qat_req, ctx->inst); + if (ret == -ENOSPC) + goto unmap_all; + + return ret; +unmap_all: if (!dma_mapping_error(dev, qat_req->phy_out)) dma_unmap_single(dev, qat_req->phy_out, sizeof(struct qat_rsa_output_params), diff --git a/drivers/crypto/qat/qat_common/qat_crypto.h b/drivers/crypto/qat/qat_common/qat_crypto.h index 0928f159ea9933..0dcba6fc358c92 100644 --- a/drivers/crypto/qat/qat_common/qat_crypto.h +++ b/drivers/crypto/qat/qat_common/qat_crypto.h @@ -9,6 +9,11 @@ #include "adf_accel_devices.h" #include "icp_qat_fw_la.h" +struct qat_alg_req { + u32 *fw_req; + struct adf_etr_ring_data *tx_ring; +}; + struct qat_crypto_instance { struct adf_etr_ring_data *sym_tx; struct adf_etr_ring_data *sym_rx; From ef5594895df27d668ad31979fbed77737e64a611 Mon Sep 17 00:00:00 2001 From: Giovanni Cabiddu Date: Mon, 9 May 2022 14:34:10 +0100 Subject: [PATCH 0913/1056] crypto: qat - add backlog mechanism [ Upstream commit 38682383973280e5be2802ba8a8d4a636d36cb19 ] The implementations of the crypto algorithms (aead, skcipher, etc) in the QAT driver do not properly support requests with the CRYPTO_TFM_REQ_MAY_BACKLOG flag set. If the HW queue is full, the driver returns -EBUSY but does not enqueue the request. This can result in applications like dm-crypt waiting indefinitely for the completion of a request that was never submitted to the hardware. Fix this by adding a software backlog queue: if the ring buffer is more than eighty percent full, then the request is enqueued to a backlog list and the error code -EBUSY is returned back to the caller. Requests in the backlog queue are resubmitted at a later time, in the context of the callback of a previously submitted request. The request for which -EBUSY is returned is then marked as -EINPROGRESS once submitted to the HW queues. The submission loop inside the function qat_alg_send_message() has been modified to decide which submission policy to use based on the request flags. If the request does not have the CRYPTO_TFM_REQ_MAY_BACKLOG set, the previous behaviour has been preserved. Based on a patch by Vishnu Das Ramachandran Cc: stable@vger.kernel.org Fixes: d370cec32194 ("crypto: qat - Intel(R) QAT crypto interface") Reported-by: Mikulas Patocka Reported-by: Kyle Sanderson Signed-off-by: Giovanni Cabiddu Reviewed-by: Marco Chiappero Signed-off-by: Herbert Xu Signed-off-by: Sasha Levin --- drivers/crypto/qat/qat_common/adf_transport.c | 11 +++ drivers/crypto/qat/qat_common/adf_transport.h | 1 + .../qat/qat_common/adf_transport_internal.h | 1 + drivers/crypto/qat/qat_common/qat_algs.c | 24 ++++--- drivers/crypto/qat/qat_common/qat_algs_send.c | 67 ++++++++++++++++++- drivers/crypto/qat/qat_common/qat_algs_send.h | 1 + drivers/crypto/qat/qat_common/qat_asym_algs.c | 23 ++++--- drivers/crypto/qat/qat_common/qat_crypto.c | 3 + drivers/crypto/qat/qat_common/qat_crypto.h | 10 +++ 9 files changed, 123 insertions(+), 18 deletions(-) diff --git a/drivers/crypto/qat/qat_common/adf_transport.c b/drivers/crypto/qat/qat_common/adf_transport.c index 8ba28409fb74b7..630d0483c4e0a1 100644 --- a/drivers/crypto/qat/qat_common/adf_transport.c +++ b/drivers/crypto/qat/qat_common/adf_transport.c @@ -8,6 +8,9 @@ #include "adf_cfg.h" #include "adf_common_drv.h" +#define ADF_MAX_RING_THRESHOLD 80 +#define ADF_PERCENT(tot, percent) (((tot) * (percent)) / 100) + static inline u32 adf_modulo(u32 data, u32 shift) { u32 div = data >> shift; @@ -77,6 +80,11 @@ static void adf_disable_ring_irq(struct adf_etr_bank_data *bank, u32 ring) bank->irq_mask); } +bool adf_ring_nearly_full(struct adf_etr_ring_data *ring) +{ + return atomic_read(ring->inflights) > ring->threshold; +} + int adf_send_message(struct adf_etr_ring_data *ring, u32 *msg) { struct adf_hw_csr_ops *csr_ops = GET_CSR_OPS(ring->bank->accel_dev); @@ -217,6 +225,7 @@ int adf_create_ring(struct adf_accel_dev *accel_dev, const char *section, struct adf_etr_bank_data *bank; struct adf_etr_ring_data *ring; char val[ADF_CFG_MAX_VAL_LEN_IN_BYTES]; + int max_inflights; u32 ring_num; int ret; @@ -263,6 +272,8 @@ int adf_create_ring(struct adf_accel_dev *accel_dev, const char *section, ring->ring_size = adf_verify_ring_size(msg_size, num_msgs); ring->head = 0; ring->tail = 0; + max_inflights = ADF_MAX_INFLIGHTS(ring->ring_size, ring->msg_size); + ring->threshold = ADF_PERCENT(max_inflights, ADF_MAX_RING_THRESHOLD); atomic_set(ring->inflights, 0); ret = adf_init_ring(ring); if (ret) diff --git a/drivers/crypto/qat/qat_common/adf_transport.h b/drivers/crypto/qat/qat_common/adf_transport.h index 2c95f1697c76fe..e6ef6f9b769133 100644 --- a/drivers/crypto/qat/qat_common/adf_transport.h +++ b/drivers/crypto/qat/qat_common/adf_transport.h @@ -14,6 +14,7 @@ int adf_create_ring(struct adf_accel_dev *accel_dev, const char *section, const char *ring_name, adf_callback_fn callback, int poll_mode, struct adf_etr_ring_data **ring_ptr); +bool adf_ring_nearly_full(struct adf_etr_ring_data *ring); int adf_send_message(struct adf_etr_ring_data *ring, u32 *msg); void adf_remove_ring(struct adf_etr_ring_data *ring); #endif diff --git a/drivers/crypto/qat/qat_common/adf_transport_internal.h b/drivers/crypto/qat/qat_common/adf_transport_internal.h index 501bcf0f1809af..8b2c92ba7ca1f0 100644 --- a/drivers/crypto/qat/qat_common/adf_transport_internal.h +++ b/drivers/crypto/qat/qat_common/adf_transport_internal.h @@ -22,6 +22,7 @@ struct adf_etr_ring_data { spinlock_t lock; /* protects ring data struct */ u16 head; u16 tail; + u32 threshold; u8 ring_number; u8 ring_size; u8 msg_size; diff --git a/drivers/crypto/qat/qat_common/qat_algs.c b/drivers/crypto/qat/qat_common/qat_algs.c index 6017ae82c7133d..873533dc43a741 100644 --- a/drivers/crypto/qat/qat_common/qat_algs.c +++ b/drivers/crypto/qat/qat_common/qat_algs.c @@ -935,19 +935,25 @@ void qat_alg_callback(void *resp) struct icp_qat_fw_la_resp *qat_resp = resp; struct qat_crypto_request *qat_req = (void *)(__force long)qat_resp->opaque_data; + struct qat_instance_backlog *backlog = qat_req->alg_req.backlog; qat_req->cb(qat_resp, qat_req); + + qat_alg_send_backlog(backlog); } static int qat_alg_send_sym_message(struct qat_crypto_request *qat_req, - struct qat_crypto_instance *inst) + struct qat_crypto_instance *inst, + struct crypto_async_request *base) { - struct qat_alg_req req; + struct qat_alg_req *alg_req = &qat_req->alg_req; - req.fw_req = (u32 *)&qat_req->req; - req.tx_ring = inst->sym_tx; + alg_req->fw_req = (u32 *)&qat_req->req; + alg_req->tx_ring = inst->sym_tx; + alg_req->base = base; + alg_req->backlog = &inst->backlog; - return qat_alg_send_message(&req); + return qat_alg_send_message(alg_req); } static int qat_alg_aead_dec(struct aead_request *areq) @@ -987,7 +993,7 @@ static int qat_alg_aead_dec(struct aead_request *areq) auth_param->auth_off = 0; auth_param->auth_len = areq->assoclen + cipher_param->cipher_length; - ret = qat_alg_send_sym_message(qat_req, ctx->inst); + ret = qat_alg_send_sym_message(qat_req, ctx->inst, &areq->base); if (ret == -ENOSPC) qat_alg_free_bufl(ctx->inst, qat_req); @@ -1031,7 +1037,7 @@ static int qat_alg_aead_enc(struct aead_request *areq) auth_param->auth_off = 0; auth_param->auth_len = areq->assoclen + areq->cryptlen; - ret = qat_alg_send_sym_message(qat_req, ctx->inst); + ret = qat_alg_send_sym_message(qat_req, ctx->inst, &areq->base); if (ret == -ENOSPC) qat_alg_free_bufl(ctx->inst, qat_req); @@ -1212,7 +1218,7 @@ static int qat_alg_skcipher_encrypt(struct skcipher_request *req) qat_alg_set_req_iv(qat_req); - ret = qat_alg_send_sym_message(qat_req, ctx->inst); + ret = qat_alg_send_sym_message(qat_req, ctx->inst, &req->base); if (ret == -ENOSPC) qat_alg_free_bufl(ctx->inst, qat_req); @@ -1278,7 +1284,7 @@ static int qat_alg_skcipher_decrypt(struct skcipher_request *req) qat_alg_set_req_iv(qat_req); qat_alg_update_iv(qat_req); - ret = qat_alg_send_sym_message(qat_req, ctx->inst); + ret = qat_alg_send_sym_message(qat_req, ctx->inst, &req->base); if (ret == -ENOSPC) qat_alg_free_bufl(ctx->inst, qat_req); diff --git a/drivers/crypto/qat/qat_common/qat_algs_send.c b/drivers/crypto/qat/qat_common/qat_algs_send.c index 78f1bb8c26c081..ff5b4347f7831d 100644 --- a/drivers/crypto/qat/qat_common/qat_algs_send.c +++ b/drivers/crypto/qat/qat_common/qat_algs_send.c @@ -6,7 +6,7 @@ #define ADF_MAX_RETRIES 20 -int qat_alg_send_message(struct qat_alg_req *req) +static int qat_alg_send_message_retry(struct qat_alg_req *req) { int ret = 0, ctr = 0; @@ -19,3 +19,68 @@ int qat_alg_send_message(struct qat_alg_req *req) return -EINPROGRESS; } + +void qat_alg_send_backlog(struct qat_instance_backlog *backlog) +{ + struct qat_alg_req *req, *tmp; + + spin_lock_bh(&backlog->lock); + list_for_each_entry_safe(req, tmp, &backlog->list, list) { + if (adf_send_message(req->tx_ring, req->fw_req)) { + /* The HW ring is full. Do nothing. + * qat_alg_send_backlog() will be invoked again by + * another callback. + */ + break; + } + list_del(&req->list); + req->base->complete(req->base, -EINPROGRESS); + } + spin_unlock_bh(&backlog->lock); +} + +static void qat_alg_backlog_req(struct qat_alg_req *req, + struct qat_instance_backlog *backlog) +{ + INIT_LIST_HEAD(&req->list); + + spin_lock_bh(&backlog->lock); + list_add_tail(&req->list, &backlog->list); + spin_unlock_bh(&backlog->lock); +} + +static int qat_alg_send_message_maybacklog(struct qat_alg_req *req) +{ + struct qat_instance_backlog *backlog = req->backlog; + struct adf_etr_ring_data *tx_ring = req->tx_ring; + u32 *fw_req = req->fw_req; + + /* If any request is already backlogged, then add to backlog list */ + if (!list_empty(&backlog->list)) + goto enqueue; + + /* If ring is nearly full, then add to backlog list */ + if (adf_ring_nearly_full(tx_ring)) + goto enqueue; + + /* If adding request to HW ring fails, then add to backlog list */ + if (adf_send_message(tx_ring, fw_req)) + goto enqueue; + + return -EINPROGRESS; + +enqueue: + qat_alg_backlog_req(req, backlog); + + return -EBUSY; +} + +int qat_alg_send_message(struct qat_alg_req *req) +{ + u32 flags = req->base->flags; + + if (flags & CRYPTO_TFM_REQ_MAY_BACKLOG) + return qat_alg_send_message_maybacklog(req); + else + return qat_alg_send_message_retry(req); +} diff --git a/drivers/crypto/qat/qat_common/qat_algs_send.h b/drivers/crypto/qat/qat_common/qat_algs_send.h index 3fa685d0c293aa..5ce9f4f69d8ff8 100644 --- a/drivers/crypto/qat/qat_common/qat_algs_send.h +++ b/drivers/crypto/qat/qat_common/qat_algs_send.h @@ -6,5 +6,6 @@ #include "qat_crypto.h" int qat_alg_send_message(struct qat_alg_req *req); +void qat_alg_send_backlog(struct qat_instance_backlog *backlog); #endif diff --git a/drivers/crypto/qat/qat_common/qat_asym_algs.c b/drivers/crypto/qat/qat_common/qat_asym_algs.c index 08b8d83e070a0e..ff7249c093c9b9 100644 --- a/drivers/crypto/qat/qat_common/qat_asym_algs.c +++ b/drivers/crypto/qat/qat_common/qat_asym_algs.c @@ -136,17 +136,21 @@ struct qat_asym_request { } areq; int err; void (*cb)(struct icp_qat_fw_pke_resp *resp); + struct qat_alg_req alg_req; } __aligned(64); static int qat_alg_send_asym_message(struct qat_asym_request *qat_req, - struct qat_crypto_instance *inst) + struct qat_crypto_instance *inst, + struct crypto_async_request *base) { - struct qat_alg_req req; + struct qat_alg_req *alg_req = &qat_req->alg_req; - req.fw_req = (u32 *)&qat_req->req; - req.tx_ring = inst->pke_tx; + alg_req->fw_req = (u32 *)&qat_req->req; + alg_req->tx_ring = inst->pke_tx; + alg_req->base = base; + alg_req->backlog = &inst->backlog; - return qat_alg_send_message(&req); + return qat_alg_send_message(alg_req); } static void qat_dh_cb(struct icp_qat_fw_pke_resp *resp) @@ -350,7 +354,7 @@ static int qat_dh_compute_value(struct kpp_request *req) msg->input_param_count = n_input_params; msg->output_param_count = 1; - ret = qat_alg_send_asym_message(qat_req, ctx->inst); + ret = qat_alg_send_asym_message(qat_req, inst, &req->base); if (ret == -ENOSPC) goto unmap_all; @@ -557,8 +561,11 @@ void qat_alg_asym_callback(void *_resp) { struct icp_qat_fw_pke_resp *resp = _resp; struct qat_asym_request *areq = (void *)(__force long)resp->opaque; + struct qat_instance_backlog *backlog = areq->alg_req.backlog; areq->cb(resp); + + qat_alg_send_backlog(backlog); } #define PKE_RSA_EP_512 0x1c161b21 @@ -748,7 +755,7 @@ static int qat_rsa_enc(struct akcipher_request *req) msg->input_param_count = 3; msg->output_param_count = 1; - ret = qat_alg_send_asym_message(qat_req, ctx->inst); + ret = qat_alg_send_asym_message(qat_req, inst, &req->base); if (ret == -ENOSPC) goto unmap_all; @@ -901,7 +908,7 @@ static int qat_rsa_dec(struct akcipher_request *req) msg->output_param_count = 1; - ret = qat_alg_send_asym_message(qat_req, ctx->inst); + ret = qat_alg_send_asym_message(qat_req, inst, &req->base); if (ret == -ENOSPC) goto unmap_all; diff --git a/drivers/crypto/qat/qat_common/qat_crypto.c b/drivers/crypto/qat/qat_common/qat_crypto.c index 3efbb388360100..59e122afa4343d 100644 --- a/drivers/crypto/qat/qat_common/qat_crypto.c +++ b/drivers/crypto/qat/qat_common/qat_crypto.c @@ -328,6 +328,9 @@ static int qat_crypto_create_instances(struct adf_accel_dev *accel_dev) &inst->pke_rx); if (ret) goto err; + + INIT_LIST_HEAD(&inst->backlog.list); + spin_lock_init(&inst->backlog.lock); } return 0; err: diff --git a/drivers/crypto/qat/qat_common/qat_crypto.h b/drivers/crypto/qat/qat_common/qat_crypto.h index 0dcba6fc358c92..245b6d9a365078 100644 --- a/drivers/crypto/qat/qat_common/qat_crypto.h +++ b/drivers/crypto/qat/qat_common/qat_crypto.h @@ -9,9 +9,17 @@ #include "adf_accel_devices.h" #include "icp_qat_fw_la.h" +struct qat_instance_backlog { + struct list_head list; + spinlock_t lock; /* protects backlog list */ +}; + struct qat_alg_req { u32 *fw_req; struct adf_etr_ring_data *tx_ring; + struct crypto_async_request *base; + struct list_head list; + struct qat_instance_backlog *backlog; }; struct qat_crypto_instance { @@ -24,6 +32,7 @@ struct qat_crypto_instance { unsigned long state; int id; atomic_t refctr; + struct qat_instance_backlog backlog; }; #define QAT_MAX_BUFF_DESC 4 @@ -82,6 +91,7 @@ struct qat_crypto_request { u8 iv[AES_BLOCK_SIZE]; }; bool encryption; + struct qat_alg_req alg_req; }; static inline bool adf_hw_dev_has_crypto(struct adf_accel_dev *accel_dev) From a843925e0287eebb4aa808666bf22c664dfe4c53 Mon Sep 17 00:00:00 2001 From: Giovanni Cabiddu Date: Mon, 9 May 2022 14:34:11 +0100 Subject: [PATCH 0914/1056] crypto: qat - fix memory leak in RSA [ Upstream commit 80a52e1ee7757b742f96bfb0d58f0c14eb6583d0 ] When an RSA key represented in form 2 (as defined in PKCS #1 V2.1) is used, some components of the private key persist even after the TFM is released. Replace the explicit calls to free the buffers in qat_rsa_exit_tfm() with a call to qat_rsa_clear_ctx() which frees all buffers referenced in the TFM context. Cc: stable@vger.kernel.org Fixes: 879f77e9071f ("crypto: qat - Add RSA CRT mode") Signed-off-by: Giovanni Cabiddu Reviewed-by: Adam Guerin Reviewed-by: Wojciech Ziemba Signed-off-by: Herbert Xu Signed-off-by: Sasha Levin --- drivers/crypto/qat/qat_common/qat_asym_algs.c | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/drivers/crypto/qat/qat_common/qat_asym_algs.c b/drivers/crypto/qat/qat_common/qat_asym_algs.c index ff7249c093c9b9..2bc02c75398ef1 100644 --- a/drivers/crypto/qat/qat_common/qat_asym_algs.c +++ b/drivers/crypto/qat/qat_common/qat_asym_algs.c @@ -1257,18 +1257,8 @@ static void qat_rsa_exit_tfm(struct crypto_akcipher *tfm) struct qat_rsa_ctx *ctx = akcipher_tfm_ctx(tfm); struct device *dev = &GET_DEV(ctx->inst->accel_dev); - if (ctx->n) - dma_free_coherent(dev, ctx->key_sz, ctx->n, ctx->dma_n); - if (ctx->e) - dma_free_coherent(dev, ctx->key_sz, ctx->e, ctx->dma_e); - if (ctx->d) { - memset(ctx->d, '\0', ctx->key_sz); - dma_free_coherent(dev, ctx->key_sz, ctx->d, ctx->dma_d); - } + qat_rsa_clear_ctx(dev, ctx); qat_crypto_put_instance(ctx->inst); - ctx->n = NULL; - ctx->e = NULL; - ctx->d = NULL; } static struct akcipher_alg rsa = { From 6e8606e7ae401251f74c91423fb5bb8e5d11a0c8 Mon Sep 17 00:00:00 2001 From: Giovanni Cabiddu Date: Mon, 9 May 2022 14:34:12 +0100 Subject: [PATCH 0915/1056] crypto: qat - remove dma_free_coherent() for RSA [ Upstream commit 3dfaf0071ed74d7a9c6b3c9ea4df7a6f8e423c2a ] After commit f5ff79fddf0e ("dma-mapping: remove CONFIG_DMA_REMAP"), if the algorithms are enabled, the driver crashes with a BUG_ON while executing vunmap() in the context of a tasklet. This is due to the fact that the function dma_free_coherent() cannot be called in an interrupt context (see Documentation/core-api/dma-api-howto.rst). The functions qat_rsa_enc() and qat_rsa_dec() allocate memory with dma_alloc_coherent() if the source or the destination buffers are made of multiple flat buffers or of a size that is not compatible with the hardware. This memory is then freed with dma_free_coherent() in the context of a tasklet invoked to handle the response for the corresponding request. Replace allocations with dma_alloc_coherent() in the functions qat_rsa_enc() and qat_rsa_dec() with kmalloc() + dma_map_single(). Cc: stable@vger.kernel.org Fixes: a990532023b9 ("crypto: qat - Add support for RSA algorithm") Signed-off-by: Giovanni Cabiddu Reviewed-by: Adam Guerin Reviewed-by: Wojciech Ziemba Signed-off-by: Herbert Xu Signed-off-by: Sasha Levin --- drivers/crypto/qat/qat_common/qat_asym_algs.c | 137 ++++++++---------- 1 file changed, 60 insertions(+), 77 deletions(-) diff --git a/drivers/crypto/qat/qat_common/qat_asym_algs.c b/drivers/crypto/qat/qat_common/qat_asym_algs.c index 2bc02c75398ef1..b31372bddb96e7 100644 --- a/drivers/crypto/qat/qat_common/qat_asym_algs.c +++ b/drivers/crypto/qat/qat_common/qat_asym_algs.c @@ -529,25 +529,22 @@ static void qat_rsa_cb(struct icp_qat_fw_pke_resp *resp) err = (err == ICP_QAT_FW_COMN_STATUS_FLAG_OK) ? 0 : -EINVAL; - if (req->src_align) - dma_free_coherent(dev, req->ctx.rsa->key_sz, req->src_align, - req->in.rsa.enc.m); - else - dma_unmap_single(dev, req->in.rsa.enc.m, req->ctx.rsa->key_sz, - DMA_TO_DEVICE); + kfree_sensitive(req->src_align); + + dma_unmap_single(dev, req->in.rsa.enc.m, req->ctx.rsa->key_sz, + DMA_TO_DEVICE); areq->dst_len = req->ctx.rsa->key_sz; if (req->dst_align) { scatterwalk_map_and_copy(req->dst_align, areq->dst, 0, areq->dst_len, 1); - dma_free_coherent(dev, req->ctx.rsa->key_sz, req->dst_align, - req->out.rsa.enc.c); - } else { - dma_unmap_single(dev, req->out.rsa.enc.c, req->ctx.rsa->key_sz, - DMA_FROM_DEVICE); + kfree_sensitive(req->dst_align); } + dma_unmap_single(dev, req->out.rsa.enc.c, req->ctx.rsa->key_sz, + DMA_FROM_DEVICE); + dma_unmap_single(dev, req->phy_in, sizeof(struct qat_rsa_input_params), DMA_TO_DEVICE); dma_unmap_single(dev, req->phy_out, @@ -664,6 +661,7 @@ static int qat_rsa_enc(struct akcipher_request *req) struct qat_asym_request *qat_req = PTR_ALIGN(akcipher_request_ctx(req), 64); struct icp_qat_fw_pke_request *msg = &qat_req->req; + u8 *vaddr; int ret; if (unlikely(!ctx->n || !ctx->e)) @@ -701,40 +699,39 @@ static int qat_rsa_enc(struct akcipher_request *req) */ if (sg_is_last(req->src) && req->src_len == ctx->key_sz) { qat_req->src_align = NULL; - qat_req->in.rsa.enc.m = dma_map_single(dev, sg_virt(req->src), - req->src_len, DMA_TO_DEVICE); - if (unlikely(dma_mapping_error(dev, qat_req->in.rsa.enc.m))) - return ret; - + vaddr = sg_virt(req->src); } else { int shift = ctx->key_sz - req->src_len; - qat_req->src_align = dma_alloc_coherent(dev, ctx->key_sz, - &qat_req->in.rsa.enc.m, - GFP_KERNEL); + qat_req->src_align = kzalloc(ctx->key_sz, GFP_KERNEL); if (unlikely(!qat_req->src_align)) return ret; scatterwalk_map_and_copy(qat_req->src_align + shift, req->src, 0, req->src_len, 0); + vaddr = qat_req->src_align; } - if (sg_is_last(req->dst) && req->dst_len == ctx->key_sz) { - qat_req->dst_align = NULL; - qat_req->out.rsa.enc.c = dma_map_single(dev, sg_virt(req->dst), - req->dst_len, - DMA_FROM_DEVICE); - if (unlikely(dma_mapping_error(dev, qat_req->out.rsa.enc.c))) - goto unmap_src; + qat_req->in.rsa.enc.m = dma_map_single(dev, vaddr, ctx->key_sz, + DMA_TO_DEVICE); + if (unlikely(dma_mapping_error(dev, qat_req->in.rsa.enc.m))) + goto unmap_src; + if (sg_is_last(req->dst) && req->dst_len == ctx->key_sz) { + qat_req->dst_align = NULL; + vaddr = sg_virt(req->dst); } else { - qat_req->dst_align = dma_alloc_coherent(dev, ctx->key_sz, - &qat_req->out.rsa.enc.c, - GFP_KERNEL); + qat_req->dst_align = kzalloc(ctx->key_sz, GFP_KERNEL); if (unlikely(!qat_req->dst_align)) goto unmap_src; - + vaddr = qat_req->dst_align; } + + qat_req->out.rsa.enc.c = dma_map_single(dev, vaddr, ctx->key_sz, + DMA_FROM_DEVICE); + if (unlikely(dma_mapping_error(dev, qat_req->out.rsa.enc.c))) + goto unmap_dst; + qat_req->in.rsa.in_tab[3] = 0; qat_req->out.rsa.out_tab[1] = 0; qat_req->phy_in = dma_map_single(dev, &qat_req->in.rsa.enc.m, @@ -772,21 +769,15 @@ static int qat_rsa_enc(struct akcipher_request *req) sizeof(struct qat_rsa_input_params), DMA_TO_DEVICE); unmap_dst: - if (qat_req->dst_align) - dma_free_coherent(dev, ctx->key_sz, qat_req->dst_align, - qat_req->out.rsa.enc.c); - else - if (!dma_mapping_error(dev, qat_req->out.rsa.enc.c)) - dma_unmap_single(dev, qat_req->out.rsa.enc.c, - ctx->key_sz, DMA_FROM_DEVICE); + if (!dma_mapping_error(dev, qat_req->out.rsa.enc.c)) + dma_unmap_single(dev, qat_req->out.rsa.enc.c, + ctx->key_sz, DMA_FROM_DEVICE); + kfree_sensitive(qat_req->dst_align); unmap_src: - if (qat_req->src_align) - dma_free_coherent(dev, ctx->key_sz, qat_req->src_align, - qat_req->in.rsa.enc.m); - else - if (!dma_mapping_error(dev, qat_req->in.rsa.enc.m)) - dma_unmap_single(dev, qat_req->in.rsa.enc.m, - ctx->key_sz, DMA_TO_DEVICE); + if (!dma_mapping_error(dev, qat_req->in.rsa.enc.m)) + dma_unmap_single(dev, qat_req->in.rsa.enc.m, ctx->key_sz, + DMA_TO_DEVICE); + kfree_sensitive(qat_req->src_align); return ret; } @@ -799,6 +790,7 @@ static int qat_rsa_dec(struct akcipher_request *req) struct qat_asym_request *qat_req = PTR_ALIGN(akcipher_request_ctx(req), 64); struct icp_qat_fw_pke_request *msg = &qat_req->req; + u8 *vaddr; int ret; if (unlikely(!ctx->n || !ctx->d)) @@ -846,40 +838,37 @@ static int qat_rsa_dec(struct akcipher_request *req) */ if (sg_is_last(req->src) && req->src_len == ctx->key_sz) { qat_req->src_align = NULL; - qat_req->in.rsa.dec.c = dma_map_single(dev, sg_virt(req->src), - req->dst_len, DMA_TO_DEVICE); - if (unlikely(dma_mapping_error(dev, qat_req->in.rsa.dec.c))) - return ret; - + vaddr = sg_virt(req->src); } else { int shift = ctx->key_sz - req->src_len; - qat_req->src_align = dma_alloc_coherent(dev, ctx->key_sz, - &qat_req->in.rsa.dec.c, - GFP_KERNEL); + qat_req->src_align = kzalloc(ctx->key_sz, GFP_KERNEL); if (unlikely(!qat_req->src_align)) return ret; scatterwalk_map_and_copy(qat_req->src_align + shift, req->src, 0, req->src_len, 0); + vaddr = qat_req->src_align; } - if (sg_is_last(req->dst) && req->dst_len == ctx->key_sz) { - qat_req->dst_align = NULL; - qat_req->out.rsa.dec.m = dma_map_single(dev, sg_virt(req->dst), - req->dst_len, - DMA_FROM_DEVICE); - if (unlikely(dma_mapping_error(dev, qat_req->out.rsa.dec.m))) - goto unmap_src; + qat_req->in.rsa.dec.c = dma_map_single(dev, vaddr, ctx->key_sz, + DMA_TO_DEVICE); + if (unlikely(dma_mapping_error(dev, qat_req->in.rsa.dec.c))) + goto unmap_src; + if (sg_is_last(req->dst) && req->dst_len == ctx->key_sz) { + qat_req->dst_align = NULL; + vaddr = sg_virt(req->dst); } else { - qat_req->dst_align = dma_alloc_coherent(dev, ctx->key_sz, - &qat_req->out.rsa.dec.m, - GFP_KERNEL); + qat_req->dst_align = kzalloc(ctx->key_sz, GFP_KERNEL); if (unlikely(!qat_req->dst_align)) goto unmap_src; - + vaddr = qat_req->dst_align; } + qat_req->out.rsa.dec.m = dma_map_single(dev, vaddr, ctx->key_sz, + DMA_FROM_DEVICE); + if (unlikely(dma_mapping_error(dev, qat_req->out.rsa.dec.m))) + goto unmap_dst; if (ctx->crt_mode) qat_req->in.rsa.in_tab[6] = 0; @@ -925,21 +914,15 @@ static int qat_rsa_dec(struct akcipher_request *req) sizeof(struct qat_rsa_input_params), DMA_TO_DEVICE); unmap_dst: - if (qat_req->dst_align) - dma_free_coherent(dev, ctx->key_sz, qat_req->dst_align, - qat_req->out.rsa.dec.m); - else - if (!dma_mapping_error(dev, qat_req->out.rsa.dec.m)) - dma_unmap_single(dev, qat_req->out.rsa.dec.m, - ctx->key_sz, DMA_FROM_DEVICE); + if (!dma_mapping_error(dev, qat_req->out.rsa.dec.m)) + dma_unmap_single(dev, qat_req->out.rsa.dec.m, + ctx->key_sz, DMA_FROM_DEVICE); + kfree_sensitive(qat_req->dst_align); unmap_src: - if (qat_req->src_align) - dma_free_coherent(dev, ctx->key_sz, qat_req->src_align, - qat_req->in.rsa.dec.c); - else - if (!dma_mapping_error(dev, qat_req->in.rsa.dec.c)) - dma_unmap_single(dev, qat_req->in.rsa.dec.c, - ctx->key_sz, DMA_TO_DEVICE); + if (!dma_mapping_error(dev, qat_req->in.rsa.dec.c)) + dma_unmap_single(dev, qat_req->in.rsa.dec.c, ctx->key_sz, + DMA_TO_DEVICE); + kfree_sensitive(qat_req->src_align); return ret; } From 2488286d3e77606f8ab923c2420889d38876ac9b Mon Sep 17 00:00:00 2001 From: Giovanni Cabiddu Date: Mon, 9 May 2022 14:34:13 +0100 Subject: [PATCH 0916/1056] crypto: qat - remove dma_free_coherent() for DH [ Upstream commit 029aa4624a7fe35233bdd3d1354dc7be260380bf ] The functions qat_dh_compute_value() allocates memory with dma_alloc_coherent() if the source or the destination buffers are made of multiple flat buffers or of a size that is not compatible with the hardware. This memory is then freed with dma_free_coherent() in the context of a tasklet invoked to handle the response for the corresponding request. According to Documentation/core-api/dma-api-howto.rst, the function dma_free_coherent() cannot be called in an interrupt context. Replace allocations with dma_alloc_coherent() in the function qat_dh_compute_value() with kmalloc() + dma_map_single(). Cc: stable@vger.kernel.org Fixes: c9839143ebbf ("crypto: qat - Add DH support") Signed-off-by: Giovanni Cabiddu Reviewed-by: Adam Guerin Reviewed-by: Wojciech Ziemba Signed-off-by: Herbert Xu Signed-off-by: Sasha Levin --- drivers/crypto/qat/qat_common/qat_asym_algs.c | 83 ++++++++----------- 1 file changed, 34 insertions(+), 49 deletions(-) diff --git a/drivers/crypto/qat/qat_common/qat_asym_algs.c b/drivers/crypto/qat/qat_common/qat_asym_algs.c index b31372bddb96e7..25bbd22085c3d9 100644 --- a/drivers/crypto/qat/qat_common/qat_asym_algs.c +++ b/drivers/crypto/qat/qat_common/qat_asym_algs.c @@ -164,26 +164,21 @@ static void qat_dh_cb(struct icp_qat_fw_pke_resp *resp) err = (err == ICP_QAT_FW_COMN_STATUS_FLAG_OK) ? 0 : -EINVAL; if (areq->src) { - if (req->src_align) - dma_free_coherent(dev, req->ctx.dh->p_size, - req->src_align, req->in.dh.in.b); - else - dma_unmap_single(dev, req->in.dh.in.b, - req->ctx.dh->p_size, DMA_TO_DEVICE); + dma_unmap_single(dev, req->in.dh.in.b, req->ctx.dh->p_size, + DMA_TO_DEVICE); + kfree_sensitive(req->src_align); } areq->dst_len = req->ctx.dh->p_size; if (req->dst_align) { scatterwalk_map_and_copy(req->dst_align, areq->dst, 0, areq->dst_len, 1); - - dma_free_coherent(dev, req->ctx.dh->p_size, req->dst_align, - req->out.dh.r); - } else { - dma_unmap_single(dev, req->out.dh.r, req->ctx.dh->p_size, - DMA_FROM_DEVICE); + kfree_sensitive(req->dst_align); } + dma_unmap_single(dev, req->out.dh.r, req->ctx.dh->p_size, + DMA_FROM_DEVICE); + dma_unmap_single(dev, req->phy_in, sizeof(struct qat_dh_input_params), DMA_TO_DEVICE); dma_unmap_single(dev, req->phy_out, @@ -231,6 +226,7 @@ static int qat_dh_compute_value(struct kpp_request *req) struct icp_qat_fw_pke_request *msg = &qat_req->req; int ret; int n_input_params = 0; + u8 *vaddr; if (unlikely(!ctx->xa)) return -EINVAL; @@ -287,27 +283,24 @@ static int qat_dh_compute_value(struct kpp_request *req) */ if (sg_is_last(req->src) && req->src_len == ctx->p_size) { qat_req->src_align = NULL; - qat_req->in.dh.in.b = dma_map_single(dev, - sg_virt(req->src), - req->src_len, - DMA_TO_DEVICE); - if (unlikely(dma_mapping_error(dev, - qat_req->in.dh.in.b))) - return ret; - + vaddr = sg_virt(req->src); } else { int shift = ctx->p_size - req->src_len; - qat_req->src_align = dma_alloc_coherent(dev, - ctx->p_size, - &qat_req->in.dh.in.b, - GFP_KERNEL); + qat_req->src_align = kzalloc(ctx->p_size, GFP_KERNEL); if (unlikely(!qat_req->src_align)) return ret; scatterwalk_map_and_copy(qat_req->src_align + shift, req->src, 0, req->src_len, 0); + + vaddr = qat_req->src_align; } + + qat_req->in.dh.in.b = dma_map_single(dev, vaddr, ctx->p_size, + DMA_TO_DEVICE); + if (unlikely(dma_mapping_error(dev, qat_req->in.dh.in.b))) + goto unmap_src; } /* * dst can be of any size in valid range, but HW expects it to be the @@ -318,20 +311,18 @@ static int qat_dh_compute_value(struct kpp_request *req) */ if (sg_is_last(req->dst) && req->dst_len == ctx->p_size) { qat_req->dst_align = NULL; - qat_req->out.dh.r = dma_map_single(dev, sg_virt(req->dst), - req->dst_len, - DMA_FROM_DEVICE); - - if (unlikely(dma_mapping_error(dev, qat_req->out.dh.r))) - goto unmap_src; - + vaddr = sg_virt(req->dst); } else { - qat_req->dst_align = dma_alloc_coherent(dev, ctx->p_size, - &qat_req->out.dh.r, - GFP_KERNEL); + qat_req->dst_align = kzalloc(ctx->p_size, GFP_KERNEL); if (unlikely(!qat_req->dst_align)) goto unmap_src; + + vaddr = qat_req->dst_align; } + qat_req->out.dh.r = dma_map_single(dev, vaddr, ctx->p_size, + DMA_FROM_DEVICE); + if (unlikely(dma_mapping_error(dev, qat_req->out.dh.r))) + goto unmap_dst; qat_req->in.dh.in_tab[n_input_params] = 0; qat_req->out.dh.out_tab[1] = 0; @@ -371,23 +362,17 @@ static int qat_dh_compute_value(struct kpp_request *req) sizeof(struct qat_dh_input_params), DMA_TO_DEVICE); unmap_dst: - if (qat_req->dst_align) - dma_free_coherent(dev, ctx->p_size, qat_req->dst_align, - qat_req->out.dh.r); - else - if (!dma_mapping_error(dev, qat_req->out.dh.r)) - dma_unmap_single(dev, qat_req->out.dh.r, ctx->p_size, - DMA_FROM_DEVICE); + if (!dma_mapping_error(dev, qat_req->out.dh.r)) + dma_unmap_single(dev, qat_req->out.dh.r, ctx->p_size, + DMA_FROM_DEVICE); + kfree_sensitive(qat_req->dst_align); unmap_src: if (req->src) { - if (qat_req->src_align) - dma_free_coherent(dev, ctx->p_size, qat_req->src_align, - qat_req->in.dh.in.b); - else - if (!dma_mapping_error(dev, qat_req->in.dh.in.b)) - dma_unmap_single(dev, qat_req->in.dh.in.b, - ctx->p_size, - DMA_TO_DEVICE); + if (!dma_mapping_error(dev, qat_req->in.dh.in.b)) + dma_unmap_single(dev, qat_req->in.dh.in.b, + ctx->p_size, + DMA_TO_DEVICE); + kfree_sensitive(qat_req->src_align); } return ret; } From 4d6d2adce08788b7667a6e58002682ea1bbf6a79 Mon Sep 17 00:00:00 2001 From: Giovanni Cabiddu Date: Mon, 9 May 2022 14:34:14 +0100 Subject: [PATCH 0917/1056] crypto: qat - add param check for RSA [ Upstream commit 9714061423b8b24b8afb31b8eb4df977c63f19c4 ] Reject requests with a source buffer that is bigger than the size of the key. This is to prevent a possible integer underflow that might happen when copying the source scatterlist into a linear buffer. Cc: stable@vger.kernel.org Signed-off-by: Giovanni Cabiddu Reviewed-by: Adam Guerin Reviewed-by: Wojciech Ziemba Signed-off-by: Herbert Xu Signed-off-by: Sasha Levin --- drivers/crypto/qat/qat_common/qat_asym_algs.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/crypto/qat/qat_common/qat_asym_algs.c b/drivers/crypto/qat/qat_common/qat_asym_algs.c index 25bbd22085c3d9..947eeff181b49d 100644 --- a/drivers/crypto/qat/qat_common/qat_asym_algs.c +++ b/drivers/crypto/qat/qat_common/qat_asym_algs.c @@ -656,6 +656,10 @@ static int qat_rsa_enc(struct akcipher_request *req) req->dst_len = ctx->key_sz; return -EOVERFLOW; } + + if (req->src_len > ctx->key_sz) + return -EINVAL; + memset(msg, '\0', sizeof(*msg)); ICP_QAT_FW_PKE_HDR_VALID_FLAG_SET(msg->pke_hdr, ICP_QAT_FW_COMN_REQ_FLAG_SET); @@ -785,6 +789,10 @@ static int qat_rsa_dec(struct akcipher_request *req) req->dst_len = ctx->key_sz; return -EOVERFLOW; } + + if (req->src_len > ctx->key_sz) + return -EINVAL; + memset(msg, '\0', sizeof(*msg)); ICP_QAT_FW_PKE_HDR_VALID_FLAG_SET(msg->pke_hdr, ICP_QAT_FW_COMN_REQ_FLAG_SET); From e7f979ed51f96495328157df663c835b17db1e30 Mon Sep 17 00:00:00 2001 From: Giovanni Cabiddu Date: Mon, 9 May 2022 14:34:15 +0100 Subject: [PATCH 0918/1056] crypto: qat - add param check for DH [ Upstream commit 2acbb8771f6ac82422886e63832ee7a0f4b1635b ] Reject requests with a source buffer that is bigger than the size of the key. This is to prevent a possible integer underflow that might happen when copying the source scatterlist into a linear buffer. Cc: stable@vger.kernel.org Signed-off-by: Giovanni Cabiddu Reviewed-by: Adam Guerin Reviewed-by: Wojciech Ziemba Signed-off-by: Herbert Xu Signed-off-by: Sasha Levin --- drivers/crypto/qat/qat_common/qat_asym_algs.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/crypto/qat/qat_common/qat_asym_algs.c b/drivers/crypto/qat/qat_common/qat_asym_algs.c index 947eeff181b49d..7173a2a0a484f0 100644 --- a/drivers/crypto/qat/qat_common/qat_asym_algs.c +++ b/drivers/crypto/qat/qat_common/qat_asym_algs.c @@ -235,6 +235,10 @@ static int qat_dh_compute_value(struct kpp_request *req) req->dst_len = ctx->p_size; return -EOVERFLOW; } + + if (req->src_len > ctx->p_size) + return -EINVAL; + memset(msg, '\0', sizeof(*msg)); ICP_QAT_FW_PKE_HDR_VALID_FLAG_SET(msg->pke_hdr, ICP_QAT_FW_COMN_REQ_FLAG_SET); From 72e0ec16f17236b860796cbcc38495356d27dedd Mon Sep 17 00:00:00 2001 From: Giovanni Cabiddu Date: Mon, 9 May 2022 14:34:17 +0100 Subject: [PATCH 0919/1056] crypto: qat - re-enable registration of algorithms [ Upstream commit d09144745959bf7852ccafd73243dd7d1eaeb163 ] Re-enable the registration of algorithms after fixes to (1) use pre-allocated buffers in the datapath and (2) support the CRYPTO_TFM_REQ_MAY_BACKLOG flag. This reverts commit 8893d27ffcaf6ec6267038a177cb87bcde4dd3de. Cc: stable@vger.kernel.org Signed-off-by: Giovanni Cabiddu Reviewed-by: Marco Chiappero Reviewed-by: Adam Guerin Reviewed-by: Wojciech Ziemba Signed-off-by: Herbert Xu Signed-off-by: Sasha Levin --- drivers/crypto/qat/qat_4xxx/adf_drv.c | 7 ------- drivers/crypto/qat/qat_common/qat_crypto.c | 7 ------- 2 files changed, 14 deletions(-) diff --git a/drivers/crypto/qat/qat_4xxx/adf_drv.c b/drivers/crypto/qat/qat_4xxx/adf_drv.c index 8fd44703115f29..359fb7989dfbfd 100644 --- a/drivers/crypto/qat/qat_4xxx/adf_drv.c +++ b/drivers/crypto/qat/qat_4xxx/adf_drv.c @@ -52,13 +52,6 @@ static int adf_crypto_dev_config(struct adf_accel_dev *accel_dev) if (ret) goto err; - /* Temporarily set the number of crypto instances to zero to avoid - * registering the crypto algorithms. - * This will be removed when the algorithms will support the - * CRYPTO_TFM_REQ_MAY_BACKLOG flag - */ - instances = 0; - for (i = 0; i < instances; i++) { val = i; bank = i * 2; diff --git a/drivers/crypto/qat/qat_common/qat_crypto.c b/drivers/crypto/qat/qat_common/qat_crypto.c index 59e122afa4343d..994e43fab0a4de 100644 --- a/drivers/crypto/qat/qat_common/qat_crypto.c +++ b/drivers/crypto/qat/qat_common/qat_crypto.c @@ -136,13 +136,6 @@ int qat_crypto_dev_config(struct adf_accel_dev *accel_dev) if (ret) goto err; - /* Temporarily set the number of crypto instances to zero to avoid - * registering the crypto algorithms. - * This will be removed when the algorithms will support the - * CRYPTO_TFM_REQ_MAY_BACKLOG flag - */ - instances = 0; - for (i = 0; i < instances; i++) { val = i; snprintf(key, sizeof(key), ADF_CY "%d" ADF_RING_ASYM_BANK_NUM, i); From 621c1d8c1bd118fae7f723b3fa3830b76435be2f Mon Sep 17 00:00:00 2001 From: Yuezhang Mo Date: Mon, 4 Apr 2022 11:58:06 +0900 Subject: [PATCH 0920/1056] exfat: fix referencing wrong parent directory information after renaming [ Upstream commit d8dad2588addd1d861ce19e7df3b702330f0c7e3 ] During renaming, the parent directory information maybe updated. But the file/directory still references to the old parent directory information. This bug will cause 2 problems. (1) The renamed file can not be written. [10768.175172] exFAT-fs (sda1): error, failed to bmap (inode : 7afd50e4 iblock : 0, err : -5) [10768.184285] exFAT-fs (sda1): Filesystem has been set read-only ash: write error: Input/output error (2) Some dentries of the renamed file/directory are not set to deleted after removing the file/directory. exfat_update_parent_info() is a workaround for the wrong parent directory information being used after renaming. Now that bug is fixed, this is no longer needed, so remove it. Fixes: 5f2aa075070c ("exfat: add inode operations") Cc: stable@vger.kernel.org # v5.7+ Signed-off-by: Yuezhang Mo Reviewed-by: Andy Wu Reviewed-by: Aoyama Wataru Reviewed-by: Daniel Palmer Reviewed-by: Sungjong Seo Signed-off-by: Namjae Jeon Signed-off-by: Sasha Levin --- fs/exfat/namei.c | 27 +-------------------------- 1 file changed, 1 insertion(+), 26 deletions(-) diff --git a/fs/exfat/namei.c b/fs/exfat/namei.c index 9d8ada781250bc..939737ba520d01 100644 --- a/fs/exfat/namei.c +++ b/fs/exfat/namei.c @@ -1069,6 +1069,7 @@ static int exfat_rename_file(struct inode *inode, struct exfat_chain *p_dir, exfat_remove_entries(inode, p_dir, oldentry, 0, num_old_entries); + ei->dir = *p_dir; ei->entry = newentry; } else { if (exfat_get_entry_type(epold) == TYPE_FILE) { @@ -1159,28 +1160,6 @@ static int exfat_move_file(struct inode *inode, struct exfat_chain *p_olddir, return 0; } -static void exfat_update_parent_info(struct exfat_inode_info *ei, - struct inode *parent_inode) -{ - struct exfat_sb_info *sbi = EXFAT_SB(parent_inode->i_sb); - struct exfat_inode_info *parent_ei = EXFAT_I(parent_inode); - loff_t parent_isize = i_size_read(parent_inode); - - /* - * the problem that struct exfat_inode_info caches wrong parent info. - * - * because of flag-mismatch of ei->dir, - * there is abnormal traversing cluster chain. - */ - if (unlikely(parent_ei->flags != ei->dir.flags || - parent_isize != EXFAT_CLU_TO_B(ei->dir.size, sbi) || - parent_ei->start_clu != ei->dir.dir)) { - exfat_chain_set(&ei->dir, parent_ei->start_clu, - EXFAT_B_TO_CLU_ROUND_UP(parent_isize, sbi), - parent_ei->flags); - } -} - /* rename or move a old file into a new file */ static int __exfat_rename(struct inode *old_parent_inode, struct exfat_inode_info *ei, struct inode *new_parent_inode, @@ -1211,8 +1190,6 @@ static int __exfat_rename(struct inode *old_parent_inode, return -ENOENT; } - exfat_update_parent_info(ei, old_parent_inode); - exfat_chain_dup(&olddir, &ei->dir); dentry = ei->entry; @@ -1233,8 +1210,6 @@ static int __exfat_rename(struct inode *old_parent_inode, goto out; } - exfat_update_parent_info(new_ei, new_parent_inode); - p_dir = &(new_ei->dir); new_entry = new_ei->entry; ep = exfat_get_dentry(sb, p_dir, new_entry, &new_bh, NULL); From 6107b014163f04252ced73e8171139ab9c8b7804 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Thu, 7 Apr 2022 14:56:32 -0400 Subject: [PATCH 0921/1056] tracing: Have event format check not flag %p* on __get_dynamic_array() [ Upstream commit 499f12168aebd6da8fa32c9b7d6203ca9b5eb88d ] The print fmt check against trace events to make sure that the format does not use pointers that may be freed from the time of the trace to the time the event is read, gives a false positive on %pISpc when reading data that was saved in __get_dynamic_array() when it is perfectly fine to do so, as the data being read is on the ring buffer. Link: https://lore.kernel.org/all/20220407144524.2a592ed6@canb.auug.org.au/ Cc: stable@vger.kernel.org Fixes: 5013f454a352c ("tracing: Add check of trace event print fmts for dereferencing pointers") Reported-by: Stephen Rothwell Signed-off-by: Steven Rostedt (Google) Signed-off-by: Sasha Levin --- kernel/trace/trace_events.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index c072e8b9849c1c..ea3fbfa87fdd06 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -407,7 +407,14 @@ static void test_event_printk(struct trace_event_call *call) a = strchr(fmt + i, '&'); if ((a && (a < r)) || test_field(r, call)) dereference_flags &= ~(1ULL << arg); + } else if ((r = strstr(fmt + i, "__get_dynamic_array(")) && + (!c || r < c)) { + dereference_flags &= ~(1ULL << arg); + } else if ((r = strstr(fmt + i, "__get_sockaddr(")) && + (!c || r < c)) { + dereference_flags &= ~(1ULL << arg); } + next_arg: i--; arg++; From d9777061727b759e999fb04a302ff50a9fd33225 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Thu, 23 Sep 2021 21:03:49 -0400 Subject: [PATCH 0922/1056] tracing: Place trace_pid_list logic into abstract functions [ Upstream commit 6954e415264eeb5ee6be0d22d789ad12c995ee64 ] Instead of having the logic that does trace_pid_list open coded, wrap it in abstract functions. This will allow a rewrite of the logic that implements the trace_pid_list without affecting the users. Note, this causes a change in behavior. Every time a pid is written into the set_*_pid file, it creates a new list and uses RCU to update it. If pid_max is lowered, but there was a pid currently in the list that was higher than pid_max, those pids will now be removed on updating the list. The old behavior kept that from happening. The rewrite of the pid_list logic will no longer depend on pid_max, and will return the old behavior. Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Sasha Levin --- kernel/trace/Makefile | 1 + kernel/trace/ftrace.c | 6 +- kernel/trace/pid_list.c | 160 ++++++++++++++++++++++++++++++++++++ kernel/trace/pid_list.h | 13 +++ kernel/trace/trace.c | 78 ++++++------------ kernel/trace/trace.h | 14 +++- kernel/trace/trace_events.c | 6 +- 7 files changed, 217 insertions(+), 61 deletions(-) create mode 100644 kernel/trace/pid_list.c create mode 100644 kernel/trace/pid_list.h diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index 6de5d4d6316588..bedc5caceec706 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile @@ -47,6 +47,7 @@ obj-$(CONFIG_TRACING) += trace_output.o obj-$(CONFIG_TRACING) += trace_seq.o obj-$(CONFIG_TRACING) += trace_stat.o obj-$(CONFIG_TRACING) += trace_printk.o +obj-$(CONFIG_TRACING) += pid_list.o obj-$(CONFIG_TRACING_MAP) += tracing_map.o obj-$(CONFIG_PREEMPTIRQ_DELAY_TEST) += preemptirq_delay_test.o obj-$(CONFIG_SYNTH_EVENT_GEN_TEST) += synth_event_gen_test.o diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 53a1af21d25ce3..e215a9c9697112 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -7184,10 +7184,10 @@ static void clear_ftrace_pids(struct trace_array *tr, int type) synchronize_rcu(); if ((type & TRACE_PIDS) && pid_list) - trace_free_pid_list(pid_list); + trace_pid_list_free(pid_list); if ((type & TRACE_NO_PIDS) && no_pid_list) - trace_free_pid_list(no_pid_list); + trace_pid_list_free(no_pid_list); } void ftrace_clear_pids(struct trace_array *tr) @@ -7428,7 +7428,7 @@ pid_write(struct file *filp, const char __user *ubuf, if (filtered_pids) { synchronize_rcu(); - trace_free_pid_list(filtered_pids); + trace_pid_list_free(filtered_pids); } else if (pid_list && !other_pids) { /* Register a probe to set whether to ignore the tracing of a task */ register_trace_sched_switch(ftrace_filter_pid_sched_switch_probe, tr); diff --git a/kernel/trace/pid_list.c b/kernel/trace/pid_list.c new file mode 100644 index 00000000000000..4483ef70b5626d --- /dev/null +++ b/kernel/trace/pid_list.c @@ -0,0 +1,160 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2021 VMware Inc, Steven Rostedt + */ +#include +#include +#include "trace.h" + +/** + * trace_pid_list_is_set - test if the pid is set in the list + * @pid_list: The pid list to test + * @pid: The pid to to see if set in the list. + * + * Tests if @pid is is set in the @pid_list. This is usually called + * from the scheduler when a task is scheduled. Its pid is checked + * if it should be traced or not. + * + * Return true if the pid is in the list, false otherwise. + */ +bool trace_pid_list_is_set(struct trace_pid_list *pid_list, unsigned int pid) +{ + /* + * If pid_max changed after filtered_pids was created, we + * by default ignore all pids greater than the previous pid_max. + */ + if (pid >= pid_list->pid_max) + return false; + + return test_bit(pid, pid_list->pids); +} + +/** + * trace_pid_list_set - add a pid to the list + * @pid_list: The pid list to add the @pid to. + * @pid: The pid to add. + * + * Adds @pid to @pid_list. This is usually done explicitly by a user + * adding a task to be traced, or indirectly by the fork function + * when children should be traced and a task's pid is in the list. + * + * Return 0 on success, negative otherwise. + */ +int trace_pid_list_set(struct trace_pid_list *pid_list, unsigned int pid) +{ + /* Sorry, but we don't support pid_max changing after setting */ + if (pid >= pid_list->pid_max) + return -EINVAL; + + set_bit(pid, pid_list->pids); + + return 0; +} + +/** + * trace_pid_list_clear - remove a pid from the list + * @pid_list: The pid list to remove the @pid from. + * @pid: The pid to remove. + * + * Removes @pid from @pid_list. This is usually done explicitly by a user + * removing tasks from tracing, or indirectly by the exit function + * when a task that is set to be traced exits. + * + * Return 0 on success, negative otherwise. + */ +int trace_pid_list_clear(struct trace_pid_list *pid_list, unsigned int pid) +{ + /* Sorry, but we don't support pid_max changing after setting */ + if (pid >= pid_list->pid_max) + return -EINVAL; + + clear_bit(pid, pid_list->pids); + + return 0; +} + +/** + * trace_pid_list_next - return the next pid in the list + * @pid_list: The pid list to examine. + * @pid: The pid to start from + * @next: The pointer to place the pid that is set starting from @pid. + * + * Looks for the next consecutive pid that is in @pid_list starting + * at the pid specified by @pid. If one is set (including @pid), then + * that pid is placed into @next. + * + * Return 0 when a pid is found, -1 if there are no more pids included. + */ +int trace_pid_list_next(struct trace_pid_list *pid_list, unsigned int pid, + unsigned int *next) +{ + pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid); + + if (pid < pid_list->pid_max) { + *next = pid; + return 0; + } + return -1; +} + +/** + * trace_pid_list_first - return the first pid in the list + * @pid_list: The pid list to examine. + * @pid: The pointer to place the pid first found pid that is set. + * + * Looks for the first pid that is set in @pid_list, and places it + * into @pid if found. + * + * Return 0 when a pid is found, -1 if there are no pids set. + */ +int trace_pid_list_first(struct trace_pid_list *pid_list, unsigned int *pid) +{ + unsigned int first; + + first = find_first_bit(pid_list->pids, pid_list->pid_max); + + if (first < pid_list->pid_max) { + *pid = first; + return 0; + } + return -1; +} + +/** + * trace_pid_list_alloc - create a new pid_list + * + * Allocates a new pid_list to store pids into. + * + * Returns the pid_list on success, NULL otherwise. + */ +struct trace_pid_list *trace_pid_list_alloc(void) +{ + struct trace_pid_list *pid_list; + + pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL); + if (!pid_list) + return NULL; + + pid_list->pid_max = READ_ONCE(pid_max); + + pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3); + if (!pid_list->pids) { + kfree(pid_list); + return NULL; + } + return pid_list; +} + +/** + * trace_pid_list_free - Frees an allocated pid_list. + * + * Frees the memory for a pid_list that was allocated. + */ +void trace_pid_list_free(struct trace_pid_list *pid_list) +{ + if (!pid_list) + return; + + vfree(pid_list->pids); + kfree(pid_list); +} diff --git a/kernel/trace/pid_list.h b/kernel/trace/pid_list.h new file mode 100644 index 00000000000000..80d0ecfe1536ec --- /dev/null +++ b/kernel/trace/pid_list.h @@ -0,0 +1,13 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* Do not include this file directly. */ + +#ifndef _TRACE_INTERNAL_PID_LIST_H +#define _TRACE_INTERNAL_PID_LIST_H + +struct trace_pid_list { + int pid_max; + unsigned long *pids; +}; + +#endif /* _TRACE_INTERNAL_PID_LIST_H */ diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index f752f2574630b0..abbe8489faae53 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -516,12 +516,6 @@ int call_filter_check_discard(struct trace_event_call *call, void *rec, return 0; } -void trace_free_pid_list(struct trace_pid_list *pid_list) -{ - vfree(pid_list->pids); - kfree(pid_list); -} - /** * trace_find_filtered_pid - check if a pid exists in a filtered_pid list * @filtered_pids: The list of pids to check @@ -532,14 +526,7 @@ void trace_free_pid_list(struct trace_pid_list *pid_list) bool trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid) { - /* - * If pid_max changed after filtered_pids was created, we - * by default ignore all pids greater than the previous pid_max. - */ - if (search_pid >= filtered_pids->pid_max) - return false; - - return test_bit(search_pid, filtered_pids->pids); + return trace_pid_list_is_set(filtered_pids, search_pid); } /** @@ -596,15 +583,11 @@ void trace_filter_add_remove_task(struct trace_pid_list *pid_list, return; } - /* Sorry, but we don't support pid_max changing after setting */ - if (task->pid >= pid_list->pid_max) - return; - /* "self" is set for forks, and NULL for exits */ if (self) - set_bit(task->pid, pid_list->pids); + trace_pid_list_set(pid_list, task->pid); else - clear_bit(task->pid, pid_list->pids); + trace_pid_list_clear(pid_list, task->pid); } /** @@ -621,18 +604,19 @@ void trace_filter_add_remove_task(struct trace_pid_list *pid_list, */ void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos) { - unsigned long pid = (unsigned long)v; + long pid = (unsigned long)v; + unsigned int next; (*pos)++; /* pid already is +1 of the actual previous bit */ - pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid); + if (trace_pid_list_next(pid_list, pid, &next) < 0) + return NULL; - /* Return pid + 1 to allow zero to be represented */ - if (pid < pid_list->pid_max) - return (void *)(pid + 1); + pid = next; - return NULL; + /* Return pid + 1 to allow zero to be represented */ + return (void *)(pid + 1); } /** @@ -649,12 +633,14 @@ void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos) void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos) { unsigned long pid; + unsigned int first; loff_t l = 0; - pid = find_first_bit(pid_list->pids, pid_list->pid_max); - if (pid >= pid_list->pid_max) + if (trace_pid_list_first(pid_list, &first) < 0) return NULL; + pid = first; + /* Return pid + 1 so that zero can be the exit value */ for (pid++; pid && l < *pos; pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l)) @@ -690,7 +676,7 @@ int trace_pid_write(struct trace_pid_list *filtered_pids, unsigned long val; int nr_pids = 0; ssize_t read = 0; - ssize_t ret = 0; + ssize_t ret; loff_t pos; pid_t pid; @@ -703,34 +689,23 @@ int trace_pid_write(struct trace_pid_list *filtered_pids, * the user. If the operation fails, then the current list is * not modified. */ - pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL); + pid_list = trace_pid_list_alloc(); if (!pid_list) { trace_parser_put(&parser); return -ENOMEM; } - pid_list->pid_max = READ_ONCE(pid_max); - - /* Only truncating will shrink pid_max */ - if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max) - pid_list->pid_max = filtered_pids->pid_max; - - pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3); - if (!pid_list->pids) { - trace_parser_put(&parser); - kfree(pid_list); - return -ENOMEM; - } - if (filtered_pids) { /* copy the current bits to the new max */ - for_each_set_bit(pid, filtered_pids->pids, - filtered_pids->pid_max) { - set_bit(pid, pid_list->pids); + ret = trace_pid_list_first(filtered_pids, &pid); + while (!ret) { + trace_pid_list_set(pid_list, pid); + ret = trace_pid_list_next(filtered_pids, pid + 1, &pid); nr_pids++; } } + ret = 0; while (cnt > 0) { pos = 0; @@ -746,12 +721,13 @@ int trace_pid_write(struct trace_pid_list *filtered_pids, ret = -EINVAL; if (kstrtoul(parser.buffer, 0, &val)) break; - if (val >= pid_list->pid_max) - break; pid = (pid_t)val; - set_bit(pid, pid_list->pids); + if (trace_pid_list_set(pid_list, pid) < 0) { + ret = -1; + break; + } nr_pids++; trace_parser_clear(&parser); @@ -760,13 +736,13 @@ int trace_pid_write(struct trace_pid_list *filtered_pids, trace_parser_put(&parser); if (ret < 0) { - trace_free_pid_list(pid_list); + trace_pid_list_free(pid_list); return ret; } if (!nr_pids) { /* Cleared the list of pids */ - trace_free_pid_list(pid_list); + trace_pid_list_free(pid_list); read = ret; pid_list = NULL; } diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 421374c304fc04..d6763366a3206f 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -22,6 +22,8 @@ #include #include +#include "pid_list.h" + #ifdef CONFIG_FTRACE_SYSCALLS #include /* For NR_SYSCALLS */ #include /* some archs define it here */ @@ -191,10 +193,14 @@ struct trace_options { struct trace_option_dentry *topts; }; -struct trace_pid_list { - int pid_max; - unsigned long *pids; -}; +struct trace_pid_list *trace_pid_list_alloc(void); +void trace_pid_list_free(struct trace_pid_list *pid_list); +bool trace_pid_list_is_set(struct trace_pid_list *pid_list, unsigned int pid); +int trace_pid_list_set(struct trace_pid_list *pid_list, unsigned int pid); +int trace_pid_list_clear(struct trace_pid_list *pid_list, unsigned int pid); +int trace_pid_list_first(struct trace_pid_list *pid_list, unsigned int *pid); +int trace_pid_list_next(struct trace_pid_list *pid_list, unsigned int pid, + unsigned int *next); enum { TRACE_PIDS = BIT(0), diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index ea3fbfa87fdd06..c4f654efb77afb 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -900,10 +900,10 @@ static void __ftrace_clear_event_pids(struct trace_array *tr, int type) tracepoint_synchronize_unregister(); if ((type & TRACE_PIDS) && pid_list) - trace_free_pid_list(pid_list); + trace_pid_list_free(pid_list); if ((type & TRACE_NO_PIDS) && no_pid_list) - trace_free_pid_list(no_pid_list); + trace_pid_list_free(no_pid_list); } static void ftrace_clear_event_pids(struct trace_array *tr, int type) @@ -1982,7 +1982,7 @@ event_pid_write(struct file *filp, const char __user *ubuf, if (filtered_pids) { tracepoint_synchronize_unregister(); - trace_free_pid_list(filtered_pids); + trace_pid_list_free(filtered_pids); } else if (pid_list && !other_pids) { register_pid_events(tr); } From d8413b16feee83d9e00fbfe332ec401726079fe7 Mon Sep 17 00:00:00 2001 From: Wonhyuk Yang Date: Tue, 3 May 2022 14:05:46 +0900 Subject: [PATCH 0923/1056] tracing: Fix return value of trace_pid_write() [ Upstream commit b27f266f74fbda4ee36c2b2b04d15992860cf23b ] Setting set_event_pid with trailing whitespace lead to endless write system calls like below. $ strace echo "123 " > /sys/kernel/debug/tracing/set_event_pid execve("/usr/bin/echo", ["echo", "123 "], ...) = 0 ... write(1, "123 \n", 5) = 4 write(1, "\n", 1) = 0 write(1, "\n", 1) = 0 write(1, "\n", 1) = 0 write(1, "\n", 1) = 0 write(1, "\n", 1) = 0 .... This is because, the result of trace_get_user's are not returned when it read at least one pid. To fix it, update read variable even if parser->idx == 0. The result of applied patch is below. $ strace echo "123 " > /sys/kernel/debug/tracing/set_event_pid execve("/usr/bin/echo", ["echo", "123 "], ...) = 0 ... write(1, "123 \n", 5) = 5 close(1) = 0 Link: https://lkml.kernel.org/r/20220503050546.288911-1-vvghjk1234@gmail.com Cc: Ingo Molnar Cc: Baik Song An Cc: Hong Yeon Kim Cc: Taeung Song Cc: linuxgeek@linuxgeek.io Cc: stable@vger.kernel.org Fixes: 4909010788640 ("tracing: Add set_event_pid directory for future use") Signed-off-by: Wonhyuk Yang Signed-off-by: Steven Rostedt (Google) Signed-off-by: Sasha Levin --- kernel/trace/trace.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index abbe8489faae53..d93f9c59f50e5f 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -711,13 +711,16 @@ int trace_pid_write(struct trace_pid_list *filtered_pids, pos = 0; ret = trace_get_user(&parser, ubuf, cnt, &pos); - if (ret < 0 || !trace_parser_loaded(&parser)) + if (ret < 0) break; read += ret; ubuf += ret; cnt -= ret; + if (!trace_parser_loaded(&parser)) + break; + ret = -EINVAL; if (kstrtoul(parser.buffer, 0, &val)) break; @@ -743,7 +746,6 @@ int trace_pid_write(struct trace_pid_list *filtered_pids, if (!nr_pids) { /* Cleared the list of pids */ trace_pid_list_free(pid_list); - read = ret; pid_list = NULL; } From 38a28bb80f001fb5c11504d94680214a56983ebe Mon Sep 17 00:00:00 2001 From: Vincent Whitchurch Date: Tue, 21 Dec 2021 10:04:46 +0100 Subject: [PATCH 0924/1056] um: virtio_uml: Allow probing from devicetree [ Upstream commit db0dd9cee82270e032123169ceff659eced5115d ] Allow the virtio_uml device to be probed from the devicetree so that sub-devices can be specified using the standard virtio bindings, for example: virtio@1 { compatible = "virtio,uml"; socket-path = "i2c.sock"; virtio-device-id = <0x22>; i2c-controller { compatible = "virtio,device22"; #address-cells = <0x01>; #size-cells = <0x00>; light-sensor@01 { compatible = "ti,opt3001"; reg = <0x01>; }; }; }; Signed-off-by: Vincent Whitchurch Signed-off-by: Richard Weinberger Signed-off-by: Sasha Levin --- arch/um/drivers/virtio_uml.c | 50 +++++++++++++++++++++++++++++++++--- 1 file changed, 47 insertions(+), 3 deletions(-) diff --git a/arch/um/drivers/virtio_uml.c b/arch/um/drivers/virtio_uml.c index 7755cb4ff9fc6d..ba562d68dc0489 100644 --- a/arch/um/drivers/virtio_uml.c +++ b/arch/um/drivers/virtio_uml.c @@ -21,6 +21,7 @@ * Based on Virtio MMIO driver by Pawel Moll, copyright 2011-2014, ARM Ltd. */ #include +#include #include #include #include @@ -49,6 +50,7 @@ struct virtio_uml_platform_data { struct virtio_uml_device { struct virtio_device vdev; struct platform_device *pdev; + struct virtio_uml_platform_data *pdata; spinlock_t sock_lock; int sock, req_fd, irq; @@ -149,7 +151,7 @@ static int vhost_user_recv(struct virtio_uml_device *vu_dev, if (rc == -ECONNRESET && vu_dev->registered) { struct virtio_uml_platform_data *pdata; - pdata = vu_dev->pdev->dev.platform_data; + pdata = vu_dev->pdata; virtio_break_device(&vu_dev->vdev); schedule_work(&pdata->conn_broken_wk); @@ -1115,21 +1117,63 @@ void virtio_uml_set_no_vq_suspend(struct virtio_device *vdev, no_vq_suspend ? "dis" : "en"); } +static void vu_of_conn_broken(struct work_struct *wk) +{ + /* + * We can't remove the device from the devicetree so the only thing we + * can do is warn. + */ + WARN_ON(1); +} + /* Platform device */ +static struct virtio_uml_platform_data * +virtio_uml_create_pdata(struct platform_device *pdev) +{ + struct device_node *np = pdev->dev.of_node; + struct virtio_uml_platform_data *pdata; + int ret; + + if (!np) + return ERR_PTR(-EINVAL); + + pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL); + if (!pdata) + return ERR_PTR(-ENOMEM); + + INIT_WORK(&pdata->conn_broken_wk, vu_of_conn_broken); + pdata->pdev = pdev; + + ret = of_property_read_string(np, "socket-path", &pdata->socket_path); + if (ret) + return ERR_PTR(ret); + + ret = of_property_read_u32(np, "virtio-device-id", + &pdata->virtio_device_id); + if (ret) + return ERR_PTR(ret); + + return pdata; +} + static int virtio_uml_probe(struct platform_device *pdev) { struct virtio_uml_platform_data *pdata = pdev->dev.platform_data; struct virtio_uml_device *vu_dev; int rc; - if (!pdata) - return -EINVAL; + if (!pdata) { + pdata = virtio_uml_create_pdata(pdev); + if (IS_ERR(pdata)) + return PTR_ERR(pdata); + } vu_dev = kzalloc(sizeof(*vu_dev), GFP_KERNEL); if (!vu_dev) return -ENOMEM; + vu_dev->pdata = pdata; vu_dev->vdev.dev.parent = &pdev->dev; vu_dev->vdev.dev.release = virtio_uml_release_dev; vu_dev->vdev.config = &virtio_uml_config_ops; From 59f132fda50d7b617b0c2910b27428b037209245 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 17 May 2022 22:52:50 +0200 Subject: [PATCH 0925/1056] um: virtio_uml: Fix broken device handling in time-travel [ Upstream commit af9fb41ed315ce95f659f0b10b4d59a71975381d ] If a device implementation crashes, virtio_uml will mark it as dead by calling virtio_break_device() and scheduling the work that will remove it. This still seems like the right thing to do, but it's done directly while reading the message, and if time-travel is used, this is in the time-travel handler, outside of the normal Linux machinery. Therefore, we cannot acquire locks or do normal "linux-y" things because e.g. lockdep will be confused about the context. Move handling this situation out of the read function and into the actual IRQ handler and response handling instead, so that in the case of time-travel we don't call it in the wrong context. Chances are the system will still crash immediately, since the device implementation crashing may also cause the time- travel controller to go down, but at least all of that now happens without strange warnings from lockdep. Fixes: c8177aba37ca ("um: time-travel: rework interrupt handling in ext mode") Cc: stable@vger.kernel.org Signed-off-by: Johannes Berg Signed-off-by: Richard Weinberger Signed-off-by: Sasha Levin --- arch/um/drivers/virtio_uml.c | 33 +++++++++++++++++++++++---------- 1 file changed, 23 insertions(+), 10 deletions(-) diff --git a/arch/um/drivers/virtio_uml.c b/arch/um/drivers/virtio_uml.c index ba562d68dc0489..82ff3785bf69f9 100644 --- a/arch/um/drivers/virtio_uml.c +++ b/arch/um/drivers/virtio_uml.c @@ -63,6 +63,7 @@ struct virtio_uml_device { u8 config_changed_irq:1; uint64_t vq_irq_vq_map; + int recv_rc; }; struct virtio_uml_vq_info { @@ -148,14 +149,6 @@ static int vhost_user_recv(struct virtio_uml_device *vu_dev, rc = vhost_user_recv_header(fd, msg); - if (rc == -ECONNRESET && vu_dev->registered) { - struct virtio_uml_platform_data *pdata; - - pdata = vu_dev->pdata; - - virtio_break_device(&vu_dev->vdev); - schedule_work(&pdata->conn_broken_wk); - } if (rc) return rc; size = msg->header.size; @@ -164,6 +157,21 @@ static int vhost_user_recv(struct virtio_uml_device *vu_dev, return full_read(fd, &msg->payload, size, false); } +static void vhost_user_check_reset(struct virtio_uml_device *vu_dev, + int rc) +{ + struct virtio_uml_platform_data *pdata = vu_dev->pdata; + + if (rc != -ECONNRESET) + return; + + if (!vu_dev->registered) + return; + + virtio_break_device(&vu_dev->vdev); + schedule_work(&pdata->conn_broken_wk); +} + static int vhost_user_recv_resp(struct virtio_uml_device *vu_dev, struct vhost_user_msg *msg, size_t max_payload_size) @@ -171,8 +179,10 @@ static int vhost_user_recv_resp(struct virtio_uml_device *vu_dev, int rc = vhost_user_recv(vu_dev, vu_dev->sock, msg, max_payload_size, true); - if (rc) + if (rc) { + vhost_user_check_reset(vu_dev, rc); return rc; + } if (msg->header.flags != (VHOST_USER_FLAG_REPLY | VHOST_USER_VERSION)) return -EPROTO; @@ -369,6 +379,7 @@ static irqreturn_t vu_req_read_message(struct virtio_uml_device *vu_dev, sizeof(msg.msg.payload) + sizeof(msg.extra_payload)); + vu_dev->recv_rc = rc; if (rc) return IRQ_NONE; @@ -412,7 +423,9 @@ static irqreturn_t vu_req_interrupt(int irq, void *data) if (!um_irq_timetravel_handler_used()) ret = vu_req_read_message(vu_dev, NULL); - if (vu_dev->vq_irq_vq_map) { + if (vu_dev->recv_rc) { + vhost_user_check_reset(vu_dev, vu_dev->recv_rc); + } else if (vu_dev->vq_irq_vq_map) { struct virtqueue *vq; virtio_device_for_each_vq((&vu_dev->vdev), vq) { From a4e8071be33756e8c5b19224359ff6221129f66b Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Fri, 3 Sep 2021 15:27:29 -0700 Subject: [PATCH 0926/1056] Bluetooth: Add bt_skb_sendmsg helper commit 38f64f650dc0e44c146ff88d15a7339efa325918 upstream. bt_skb_sendmsg helps takes care of allocation the skb and copying the the contents of msg over to the skb while checking for possible errors so it should be safe to call it without holding lock_sock. Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Marcel Holtmann Cc: Harshit Mogalapalli Signed-off-by: Greg Kroah-Hartman --- include/net/bluetooth/bluetooth.h | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h index 3fecc4a411a13a..83eaa42896bb27 100644 --- a/include/net/bluetooth/bluetooth.h +++ b/include/net/bluetooth/bluetooth.h @@ -422,6 +422,34 @@ static inline struct sk_buff *bt_skb_send_alloc(struct sock *sk, return NULL; } +/* Shall not be called with lock_sock held */ +static inline struct sk_buff *bt_skb_sendmsg(struct sock *sk, + struct msghdr *msg, + size_t len, size_t mtu, + size_t headroom, size_t tailroom) +{ + struct sk_buff *skb; + size_t size = min_t(size_t, len, mtu); + int err; + + skb = bt_skb_send_alloc(sk, size + headroom + tailroom, + msg->msg_flags & MSG_DONTWAIT, &err); + if (!skb) + return ERR_PTR(err); + + skb_reserve(skb, headroom); + skb_tailroom_reserve(skb, mtu, tailroom); + + if (!copy_from_iter_full(skb_put(skb, size), size, &msg->msg_iter)) { + kfree_skb(skb); + return ERR_PTR(-EFAULT); + } + + skb->priority = sk->sk_priority; + + return skb; +} + int bt_to_errno(u16 code); void hci_sock_set_flag(struct sock *sk, int nr); From cb7ed8c7fe5b20091c2297f7d3e2d36ebcd95672 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Fri, 3 Sep 2021 15:27:30 -0700 Subject: [PATCH 0927/1056] Bluetooth: Add bt_skb_sendmmsg helper commit 97e4e80299844bb5f6ce5a7540742ffbffae3d97 upstream. This works similarly to bt_skb_sendmsg but can split the msg into multiple skb fragments which is useful for stream sockets. Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Marcel Holtmann Cc: Harshit Mogalapalli Signed-off-by: Greg Kroah-Hartman --- include/net/bluetooth/bluetooth.h | 38 +++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h index 83eaa42896bb27..3275d573728512 100644 --- a/include/net/bluetooth/bluetooth.h +++ b/include/net/bluetooth/bluetooth.h @@ -450,6 +450,44 @@ static inline struct sk_buff *bt_skb_sendmsg(struct sock *sk, return skb; } +/* Similar to bt_skb_sendmsg but can split the msg into multiple fragments + * accourding to the MTU. + */ +static inline struct sk_buff *bt_skb_sendmmsg(struct sock *sk, + struct msghdr *msg, + size_t len, size_t mtu, + size_t headroom, size_t tailroom) +{ + struct sk_buff *skb, **frag; + + skb = bt_skb_sendmsg(sk, msg, len, mtu, headroom, tailroom); + if (IS_ERR_OR_NULL(skb)) + return skb; + + len -= skb->len; + if (!len) + return skb; + + /* Add remaining data over MTU as continuation fragments */ + frag = &skb_shinfo(skb)->frag_list; + while (len) { + struct sk_buff *tmp; + + tmp = bt_skb_sendmsg(sk, msg, len, mtu, headroom, tailroom); + if (IS_ERR_OR_NULL(tmp)) { + kfree_skb(skb); + return tmp; + } + + len -= tmp->len; + + *frag = tmp; + frag = &(*frag)->next; + } + + return skb; +} + int bt_to_errno(u16 code); void hci_sock_set_flag(struct sock *sk, int nr); From 1864e820a5ac4747845247497d9daf8d87c9c8c4 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Fri, 3 Sep 2021 15:27:31 -0700 Subject: [PATCH 0928/1056] Bluetooth: SCO: Replace use of memcpy_from_msg with bt_skb_sendmsg commit 0771cbb3b97d3c1d68eecd7f00055f599954c34e upstream. This makes use of bt_skb_sendmsg instead of allocating a different buffer to be used with memcpy_from_msg which cause one extra copy. Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Marcel Holtmann Cc: Harshit Mogalapalli Signed-off-by: Greg Kroah-Hartman --- net/bluetooth/sco.c | 34 +++++++++++----------------------- 1 file changed, 11 insertions(+), 23 deletions(-) diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index c7b43c75677fcc..3ed90c5fbebb14 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -280,27 +280,19 @@ static int sco_connect(struct hci_dev *hdev, struct sock *sk) return err; } -static int sco_send_frame(struct sock *sk, void *buf, int len, - unsigned int msg_flags) +static int sco_send_frame(struct sock *sk, struct sk_buff *skb) { struct sco_conn *conn = sco_pi(sk)->conn; - struct sk_buff *skb; - int err; /* Check outgoing MTU */ - if (len > conn->mtu) + if (skb->len > conn->mtu) return -EINVAL; - BT_DBG("sk %p len %d", sk, len); + BT_DBG("sk %p len %d", sk, skb->len); - skb = bt_skb_send_alloc(sk, len, msg_flags & MSG_DONTWAIT, &err); - if (!skb) - return err; - - memcpy(skb_put(skb, len), buf, len); hci_send_sco(conn->hcon, skb); - return len; + return skb->len; } static void sco_recv_frame(struct sco_conn *conn, struct sk_buff *skb) @@ -727,7 +719,7 @@ static int sco_sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) { struct sock *sk = sock->sk; - void *buf; + struct sk_buff *skb; int err; BT_DBG("sock %p, sk %p", sock, sk); @@ -739,24 +731,20 @@ static int sco_sock_sendmsg(struct socket *sock, struct msghdr *msg, if (msg->msg_flags & MSG_OOB) return -EOPNOTSUPP; - buf = kmalloc(len, GFP_KERNEL); - if (!buf) - return -ENOMEM; - - if (memcpy_from_msg(buf, msg, len)) { - kfree(buf); - return -EFAULT; - } + skb = bt_skb_sendmsg(sk, msg, len, len, 0, 0); + if (IS_ERR_OR_NULL(skb)) + return PTR_ERR(skb); lock_sock(sk); if (sk->sk_state == BT_CONNECTED) - err = sco_send_frame(sk, buf, len, msg->msg_flags); + err = sco_send_frame(sk, skb); else err = -ENOTCONN; release_sock(sk); - kfree(buf); + if (err) + kfree_skb(skb); return err; } From 367becefb8a1b4512d6ec4d6dc7e84006f8ac773 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Fri, 3 Sep 2021 15:27:32 -0700 Subject: [PATCH 0929/1056] Bluetooth: RFCOMM: Replace use of memcpy_from_msg with bt_skb_sendmmsg commit 81be03e026dc0c16dc1c64e088b2a53b73caa895 upstream. This makes use of bt_skb_sendmmsg instead using memcpy_from_msg which is not considered safe to be used when lock_sock is held. Also make rfcomm_dlc_send handle skb with fragments and queue them all atomically. Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Marcel Holtmann Cc: Harshit Mogalapalli Signed-off-by: Greg Kroah-Hartman --- net/bluetooth/rfcomm/core.c | 50 +++++++++++++++++++++++++++++++------ net/bluetooth/rfcomm/sock.c | 46 ++++++++-------------------------- 2 files changed, 53 insertions(+), 43 deletions(-) diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c index f2bacb464ccf3b..7324764384b677 100644 --- a/net/bluetooth/rfcomm/core.c +++ b/net/bluetooth/rfcomm/core.c @@ -549,22 +549,58 @@ struct rfcomm_dlc *rfcomm_dlc_exists(bdaddr_t *src, bdaddr_t *dst, u8 channel) return dlc; } +static int rfcomm_dlc_send_frag(struct rfcomm_dlc *d, struct sk_buff *frag) +{ + int len = frag->len; + + BT_DBG("dlc %p mtu %d len %d", d, d->mtu, len); + + if (len > d->mtu) + return -EINVAL; + + rfcomm_make_uih(frag, d->addr); + __skb_queue_tail(&d->tx_queue, frag); + + return len; +} + int rfcomm_dlc_send(struct rfcomm_dlc *d, struct sk_buff *skb) { - int len = skb->len; + unsigned long flags; + struct sk_buff *frag, *next; + int len; if (d->state != BT_CONNECTED) return -ENOTCONN; - BT_DBG("dlc %p mtu %d len %d", d, d->mtu, len); + frag = skb_shinfo(skb)->frag_list; + skb_shinfo(skb)->frag_list = NULL; - if (len > d->mtu) - return -EINVAL; + /* Queue all fragments atomically. */ + spin_lock_irqsave(&d->tx_queue.lock, flags); - rfcomm_make_uih(skb, d->addr); - skb_queue_tail(&d->tx_queue, skb); + len = rfcomm_dlc_send_frag(d, skb); + if (len < 0 || !frag) + goto unlock; + + for (; frag; frag = next) { + int ret; + + next = frag->next; + + ret = rfcomm_dlc_send_frag(d, frag); + if (ret < 0) { + kfree_skb(frag); + goto unlock; + } + + len += ret; + } + +unlock: + spin_unlock_irqrestore(&d->tx_queue.lock, flags); - if (!test_bit(RFCOMM_TX_THROTTLED, &d->flags)) + if (len > 0 && !test_bit(RFCOMM_TX_THROTTLED, &d->flags)) rfcomm_schedule(); return len; } diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c index 2c95bb58f901a5..5938af3e9936e0 100644 --- a/net/bluetooth/rfcomm/sock.c +++ b/net/bluetooth/rfcomm/sock.c @@ -575,46 +575,20 @@ static int rfcomm_sock_sendmsg(struct socket *sock, struct msghdr *msg, lock_sock(sk); sent = bt_sock_wait_ready(sk, msg->msg_flags); - if (sent) - goto done; - - while (len) { - size_t size = min_t(size_t, len, d->mtu); - int err; - - skb = sock_alloc_send_skb(sk, size + RFCOMM_SKB_RESERVE, - msg->msg_flags & MSG_DONTWAIT, &err); - if (!skb) { - if (sent == 0) - sent = err; - break; - } - skb_reserve(skb, RFCOMM_SKB_HEAD_RESERVE); - - err = memcpy_from_msg(skb_put(skb, size), msg, size); - if (err) { - kfree_skb(skb); - if (sent == 0) - sent = err; - break; - } - skb->priority = sk->sk_priority; + release_sock(sk); - err = rfcomm_dlc_send(d, skb); - if (err < 0) { - kfree_skb(skb); - if (sent == 0) - sent = err; - break; - } + if (sent) + return sent; - sent += size; - len -= size; - } + skb = bt_skb_sendmmsg(sk, msg, len, d->mtu, RFCOMM_SKB_HEAD_RESERVE, + RFCOMM_SKB_TAIL_RESERVE); + if (IS_ERR_OR_NULL(skb)) + return PTR_ERR(skb); -done: - release_sock(sk); + sent = rfcomm_dlc_send(d, skb); + if (sent < 0) + kfree_skb(skb); return sent; } From 5ae749f40d0166be68d845781fe61bfcfa147ba4 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Thu, 16 Sep 2021 13:10:48 -0700 Subject: [PATCH 0930/1056] Bluetooth: Fix passing NULL to PTR_ERR commit 266191aa8d14b84958aaeb5e96ee4e97839e3d87 upstream. Passing NULL to PTR_ERR will result in 0 (success), also since the likes of bt_skb_sendmsg does never return NULL it is safe to replace the instances of IS_ERR_OR_NULL with IS_ERR when checking its return. Reported-by: Dan Carpenter Tested-by: Tedd Ho-Jeong An Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Marcel Holtmann Cc: Harshit Mogalapalli Signed-off-by: Greg Kroah-Hartman --- include/net/bluetooth/bluetooth.h | 2 +- net/bluetooth/rfcomm/sock.c | 2 +- net/bluetooth/sco.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h index 3275d573728512..b85e6d9ba39f5a 100644 --- a/include/net/bluetooth/bluetooth.h +++ b/include/net/bluetooth/bluetooth.h @@ -474,7 +474,7 @@ static inline struct sk_buff *bt_skb_sendmmsg(struct sock *sk, struct sk_buff *tmp; tmp = bt_skb_sendmsg(sk, msg, len, mtu, headroom, tailroom); - if (IS_ERR_OR_NULL(tmp)) { + if (IS_ERR(tmp)) { kfree_skb(skb); return tmp; } diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c index 5938af3e9936e0..4bf4ea6cbb5eee 100644 --- a/net/bluetooth/rfcomm/sock.c +++ b/net/bluetooth/rfcomm/sock.c @@ -583,7 +583,7 @@ static int rfcomm_sock_sendmsg(struct socket *sock, struct msghdr *msg, skb = bt_skb_sendmmsg(sk, msg, len, d->mtu, RFCOMM_SKB_HEAD_RESERVE, RFCOMM_SKB_TAIL_RESERVE); - if (IS_ERR_OR_NULL(skb)) + if (IS_ERR(skb)) return PTR_ERR(skb); sent = rfcomm_dlc_send(d, skb); diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index 3ed90c5fbebb14..17ca65fb6c50d7 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -732,7 +732,7 @@ static int sco_sock_sendmsg(struct socket *sock, struct msghdr *msg, return -EOPNOTSUPP; skb = bt_skb_sendmsg(sk, msg, len, len, 0, 0); - if (IS_ERR_OR_NULL(skb)) + if (IS_ERR(skb)) return PTR_ERR(skb); lock_sock(sk); From d01605a01f0103fc8c05ac211ba08d452f9d1fa1 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Thu, 16 Sep 2021 13:10:49 -0700 Subject: [PATCH 0931/1056] Bluetooth: SCO: Fix sco_send_frame returning skb->len commit 037ce005af6b8a3e40ee07c6e9266c8997e6a4d6 upstream. The skb in modified by hci_send_sco which pushes SCO headers thus changing skb->len causing sco_sock_sendmsg to fail. Fixes: 0771cbb3b97d ("Bluetooth: SCO: Replace use of memcpy_from_msg with bt_skb_sendmsg") Tested-by: Tedd Ho-Jeong An Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Marcel Holtmann Cc: Harshit Mogalapalli Signed-off-by: Greg Kroah-Hartman --- net/bluetooth/sco.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index 17ca65fb6c50d7..9a8814d4565a0b 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -283,16 +283,17 @@ static int sco_connect(struct hci_dev *hdev, struct sock *sk) static int sco_send_frame(struct sock *sk, struct sk_buff *skb) { struct sco_conn *conn = sco_pi(sk)->conn; + int len = skb->len; /* Check outgoing MTU */ - if (skb->len > conn->mtu) + if (len > conn->mtu) return -EINVAL; - BT_DBG("sk %p len %d", sk, skb->len); + BT_DBG("sk %p len %d", sk, len); hci_send_sco(conn->hcon, skb); - return skb->len; + return len; } static void sco_recv_frame(struct sco_conn *conn, struct sk_buff *skb) @@ -743,7 +744,8 @@ static int sco_sock_sendmsg(struct socket *sock, struct msghdr *msg, err = -ENOTCONN; release_sock(sk); - if (err) + + if (err < 0) kfree_skb(skb); return err; } From 000473ac997a46f787a9beaa55110e4b2d21b9f2 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Mon, 14 Feb 2022 17:59:38 -0800 Subject: [PATCH 0932/1056] Bluetooth: Fix bt_skb_sendmmsg not allocating partial chunks commit 29fb608396d6a62c1b85acc421ad7a4399085b9f upstream. Since bt_skb_sendmmsg can be used with the likes of SOCK_STREAM it shall return the partial chunks it could allocate instead of freeing everything as otherwise it can cause problems like bellow. Fixes: 81be03e026dc ("Bluetooth: RFCOMM: Replace use of memcpy_from_msg with bt_skb_sendmmsg") Reported-by: Paul Menzel Link: https://lore.kernel.org/r/d7206e12-1b99-c3be-84f4-df22af427ef5@molgen.mpg.de BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=215594 Signed-off-by: Luiz Augusto von Dentz Tested-by: Paul Menzel (Nokia N9 (MeeGo/Harmattan) Signed-off-by: Marcel Holtmann Cc: Harshit Mogalapalli Signed-off-by: Greg Kroah-Hartman --- include/net/bluetooth/bluetooth.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h index b85e6d9ba39f5a..355835639ae583 100644 --- a/include/net/bluetooth/bluetooth.h +++ b/include/net/bluetooth/bluetooth.h @@ -475,8 +475,7 @@ static inline struct sk_buff *bt_skb_sendmmsg(struct sock *sk, tmp = bt_skb_sendmsg(sk, msg, len, mtu, headroom, tailroom); if (IS_ERR(tmp)) { - kfree_skb(skb); - return tmp; + return skb; } len -= tmp->len; From 4b4b1f8dfeb7814aa69666d9a15769bb15e5a36a Mon Sep 17 00:00:00 2001 From: Sungjong Seo Date: Wed, 8 Jun 2022 00:05:21 +0900 Subject: [PATCH 0933/1056] exfat: use updated exfat_chain directly during renaming commit 204e6ceaa1035cb7b92b156517e88842ebb4c7ff upstream. In order for a file to access its own directory entry set, exfat_inode_info(ei) has two copied values. One is ei->dir, which is a snapshot of exfat_chain of the parent directory, and the other is ei->entry, which is the offset of the start of the directory entry set in the parent directory. Since the parent directory can be updated after the snapshot point, it should be used only for accessing one's own directory entry set. However, as of now, during renaming, it could try to traverse or to allocate clusters via snapshot values, it does not make sense. This potential problem has been revealed when exfat_update_parent_info() was removed by commit d8dad2588add ("exfat: fix referencing wrong parent directory information after renaming"). However, I don't think it's good idea to bring exfat_update_parent_info() back. Instead, let's use the updated exfat_chain of parent directory diectly. Fixes: d8dad2588add ("exfat: fix referencing wrong parent directory information after renaming") Reported-by: Wang Yugui Signed-off-by: Sungjong Seo Tested-by: Wang Yugui Signed-off-by: Namjae Jeon Signed-off-by: Greg Kroah-Hartman --- fs/exfat/namei.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/exfat/namei.c b/fs/exfat/namei.c index 939737ba520d01..8a7f4c0830f3d7 100644 --- a/fs/exfat/namei.c +++ b/fs/exfat/namei.c @@ -1190,7 +1190,9 @@ static int __exfat_rename(struct inode *old_parent_inode, return -ENOENT; } - exfat_chain_dup(&olddir, &ei->dir); + exfat_chain_set(&olddir, EXFAT_I(old_parent_inode)->start_clu, + EXFAT_B_TO_CLU_ROUND_UP(i_size_read(old_parent_inode), sbi), + EXFAT_I(old_parent_inode)->flags); dentry = ei->entry; ep = exfat_get_dentry(sb, &olddir, dentry, &old_bh, NULL); From b3f16976b9ed7afafa2caf12ef5539fafc34d48d Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Fri, 10 Dec 2021 15:04:05 -0800 Subject: [PATCH 0934/1056] drm/amd/display: Reset DMCUB before HW init commit 791255ca9fbe38042cfd55df5deb116dc11fef18 upstream. [Why] If the firmware wasn't reset by PSP or HW and is currently running then the firmware will hang or perform underfined behavior when we modify its firmware state underneath it. [How] Reset DMCUB before setting up cache windows and performing HW init. Reviewed-by: Aurabindo Jayamohanan Pillai Acked-by: Pavle Kotarac Tested-by: Daniel Wheeler Signed-off-by: Nicholas Kazlauskas Signed-off-by: Alex Deucher Cc: "Limonciello, Mario" Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 7150afacbc4f51..3b4449bf4d85f6 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -1028,6 +1028,11 @@ static int dm_dmub_hw_init(struct amdgpu_device *adev) return 0; } + /* Reset DMCUB if it was previously running - before we overwrite its memory. */ + status = dmub_srv_hw_reset(dmub_srv); + if (status != DMUB_STATUS_OK) + DRM_WARN("Error resetting DMUB HW: %d\n", status); + hdr = (const struct dmcub_firmware_header_v1_0 *)dmub_fw->data; fw_inst_const = dmub_fw->data + From 9581511948582a0ed8da03fb91d2ab658087283d Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Thu, 19 Aug 2021 16:36:09 -0400 Subject: [PATCH 0935/1056] drm/amd/display: Optimize bandwidth on following fast update commit 34316c1e561db0b24e341029f04a5a5bead9a7bc upstream. [Why] The current call to optimize_bandwidth never occurs because flip is always pending from the FULL and FAST updates. [How] Optimize on the following flip when it's a FAST update and we know we aren't going to be modifying the clocks again. Reviewed-by: Bhawanpreet Lakha Acked-by: Mikita Lipski Signed-off-by: Nicholas Kazlauskas Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher Cc: Mario Limonciello Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/display/dc/core/dc.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index b37c4d2e7a1e09..46bef17399206e 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -1788,6 +1788,11 @@ void dc_post_update_surfaces_to_stream(struct dc *dc) post_surface_trace(dc); + if (dc->ctx->dce_version >= DCE_VERSION_MAX) + TRACE_DCN_CLOCK_STATE(&context->bw_ctx.bw.dcn.clk); + else + TRACE_DCE_CLOCK_STATE(&context->bw_ctx.bw.dce); + if (is_flip_pending_in_pipes(dc, context)) return; @@ -2974,6 +2979,9 @@ void dc_commit_updates_for_stream(struct dc *dc, if (new_pipe->plane_state && new_pipe->plane_state != old_pipe->plane_state) new_pipe->plane_state->force_full_update = true; } + } else if (update_type == UPDATE_TYPE_FAST) { + /* Previous frame finished and HW is ready for optimization. */ + dc_post_update_surfaces_to_stream(dc); } @@ -3030,15 +3038,6 @@ void dc_commit_updates_for_stream(struct dc *dc, pipe_ctx->plane_state->force_full_update = false; } } - /*let's use current_state to update watermark etc*/ - if (update_type >= UPDATE_TYPE_FULL) { - dc_post_update_surfaces_to_stream(dc); - - if (dc_ctx->dce_version >= DCE_VERSION_MAX) - TRACE_DCN_CLOCK_STATE(&context->bw_ctx.bw.dcn.clk); - else - TRACE_DCE_CLOCK_STATE(&context->bw_ctx.bw.dce); - } return; From c2b484d784c8dfc3eb62fd4f9ff11515b6e78e19 Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Tue, 12 Oct 2021 10:04:03 -0400 Subject: [PATCH 0936/1056] drm/amd/display: Fix surface optimization regression on Carrizo commit 62e5a7e2333a9f5395f6a9db766b7b06c949fe7a upstream. [Why] DCE legacy optimization path isn't well tested under new DC optimization flow which can result in underflow occuring when initializing X11 on Carrizo. [How] Retain the legacy optimization flow for DCE and keep the new one for DCN to satisfy optimizations being correctly applied for ASIC that can support it. Fixes: 34316c1e561db0 ("drm/amd/display: Optimize bandwidth on following fast update") Reported-by: Tom St Denis Tested-by: Tom St Denis Reviewed-by: Harry Wentland Signed-off-by: Nicholas Kazlauskas Signed-off-by: Alex Deucher Cc: Mario Limonciello Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/display/dc/core/dc.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 46bef17399206e..1bde9d4e82d4bb 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -2979,8 +2979,13 @@ void dc_commit_updates_for_stream(struct dc *dc, if (new_pipe->plane_state && new_pipe->plane_state != old_pipe->plane_state) new_pipe->plane_state->force_full_update = true; } - } else if (update_type == UPDATE_TYPE_FAST) { - /* Previous frame finished and HW is ready for optimization. */ + } else if (update_type == UPDATE_TYPE_FAST && dc_ctx->dce_version >= DCE_VERSION_MAX) { + /* + * Previous frame finished and HW is ready for optimization. + * + * Only relevant for DCN behavior where we can guarantee the optimization + * is safe to apply - retain the legacy behavior for DCE. + */ dc_post_update_surfaces_to_stream(dc); } @@ -3039,6 +3044,12 @@ void dc_commit_updates_for_stream(struct dc *dc, } } + /* Legacy optimization path for DCE. */ + if (update_type >= UPDATE_TYPE_FULL && dc_ctx->dce_version < DCE_VERSION_MAX) { + dc_post_update_surfaces_to_stream(dc); + TRACE_DCE_CLOCK_STATE(&context->bw_ctx.bw.dce); + } + return; } From 8842d5d70713896ac95423ce795366b350b1b0d7 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 18 Jul 2022 13:41:37 +0200 Subject: [PATCH 0937/1056] x86/amd: Use IBPB for firmware calls commit 28a99e95f55c61855983d36a88c05c178d966bb7 upstream. On AMD IBRS does not prevent Retbleed; as such use IBPB before a firmware call to flush the branch history state. And because in order to do an EFI call, the kernel maps a whole lot of the kernel page table into the EFI page table, do an IBPB just in case in order to prevent the scenario of poisoning the BTB and causing an EFI call using the unprotected RET there. [ bp: Massage. ] Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/20220715194550.793957-1-cascardo@canonical.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/cpufeatures.h | 1 + arch/x86/include/asm/nospec-branch.h | 2 ++ arch/x86/kernel/cpu/bugs.c | 11 ++++++++++- 3 files changed, 13 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 3781a7f489ef37..d370718e222bab 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -300,6 +300,7 @@ #define X86_FEATURE_RETPOLINE_LFENCE (11*32+13) /* "" Use LFENCE for Spectre variant 2 */ #define X86_FEATURE_RETHUNK (11*32+14) /* "" Use REturn THUNK */ #define X86_FEATURE_UNRET (11*32+15) /* "" AMD BTB untrain return */ +#define X86_FEATURE_USE_IBPB_FW (11*32+16) /* "" Use IBPB during runtime firmware calls */ /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */ #define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */ diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 0a87b2bc4ef945..9a79b96e55214f 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -298,6 +298,8 @@ do { \ alternative_msr_write(MSR_IA32_SPEC_CTRL, \ spec_ctrl_current() | SPEC_CTRL_IBRS, \ X86_FEATURE_USE_IBRS_FW); \ + alternative_msr_write(MSR_IA32_PRED_CMD, PRED_CMD_IBPB, \ + X86_FEATURE_USE_IBPB_FW); \ } while (0) #define firmware_restrict_branch_speculation_end() \ diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 752941879792f0..18a7ea1cffdaca 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -1512,7 +1512,16 @@ static void __init spectre_v2_select_mitigation(void) * the CPU supports Enhanced IBRS, kernel might un-intentionally not * enable IBRS around firmware calls. */ - if (boot_cpu_has(X86_FEATURE_IBRS) && !spectre_v2_in_ibrs_mode(mode)) { + if (boot_cpu_has_bug(X86_BUG_RETBLEED) && + (boot_cpu_data.x86_vendor == X86_VENDOR_AMD || + boot_cpu_data.x86_vendor == X86_VENDOR_HYGON)) { + + if (retbleed_cmd != RETBLEED_CMD_IBPB) { + setup_force_cpu_cap(X86_FEATURE_USE_IBPB_FW); + pr_info("Enabling Speculation Barrier for firmware calls\n"); + } + + } else if (boot_cpu_has(X86_FEATURE_IBRS) && !spectre_v2_in_ibrs_mode(mode)) { setup_force_cpu_cap(X86_FEATURE_USE_IBRS_FW); pr_info("Enabling Restricted Speculation for firmware calls\n"); } From 49338b651f5a6ba0863296678735f0d7bb75c1ed Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 13 Jul 2022 14:38:19 -0700 Subject: [PATCH 0938/1056] x86/alternative: Report missing return thunk details commit 65cdf0d623bedf0e069bb64ed52e8bb20105e2ba upstream. Debugging missing return thunks is easier if we can see where they're happening. Suggested-by: Peter Zijlstra Signed-off-by: Kees Cook Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/lkml/Ys66hwtFcGbYmoiZ@hirez.programming.kicks-ass.net/ Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/alternative.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 8ed9ccf53b6239..98a8b59f87f360 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -554,7 +554,9 @@ void __init_or_module noinline apply_returns(s32 *start, s32 *end) dest = addr + insn.length + insn.immediate.value; if (__static_call_fixup(addr, op, dest) || - WARN_ON_ONCE(dest != &__x86_return_thunk)) + WARN_ONCE(dest != &__x86_return_thunk, + "missing return thunk: %pS-%pS: %*ph", + addr, dest, 5, addr)) continue; DPRINTK("return thunk at: %pS (%px) len: %d to: %pS", From ba3a8af8a21a81cfd0c8c689a81261caba934f97 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Tue, 19 Jul 2022 11:09:01 -0700 Subject: [PATCH 0939/1056] watchqueue: make sure to serialize 'wqueue->defunct' properly commit 353f7988dd8413c47718f7ca79c030b6fb62cfe5 upstream. When the pipe is closed, we mark the associated watchqueue defunct by calling watch_queue_clear(). However, while that is protected by the watchqueue lock, new watchqueue entries aren't actually added under that lock at all: they use the pipe->rd_wait.lock instead, and looking up that pipe happens without any locking. The watchqueue code uses the RCU read-side section to make sure that the wqueue entry itself hasn't disappeared, but that does not protect the pipe_info in any way. So make sure to actually hold the wqueue lock when posting watch events, properly serializing against the pipe being torn down. Reported-by: Noam Rathaus Cc: Greg KH Cc: David Howells Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- kernel/watch_queue.c | 53 +++++++++++++++++++++++++++++++------------- 1 file changed, 37 insertions(+), 16 deletions(-) diff --git a/kernel/watch_queue.c b/kernel/watch_queue.c index b1ae7c9c3b473d..3df760b9a775fe 100644 --- a/kernel/watch_queue.c +++ b/kernel/watch_queue.c @@ -34,6 +34,27 @@ MODULE_LICENSE("GPL"); #define WATCH_QUEUE_NOTE_SIZE 128 #define WATCH_QUEUE_NOTES_PER_PAGE (PAGE_SIZE / WATCH_QUEUE_NOTE_SIZE) +/* + * This must be called under the RCU read-lock, which makes + * sure that the wqueue still exists. It can then take the lock, + * and check that the wqueue hasn't been destroyed, which in + * turn makes sure that the notification pipe still exists. + */ +static inline bool lock_wqueue(struct watch_queue *wqueue) +{ + spin_lock_bh(&wqueue->lock); + if (unlikely(wqueue->defunct)) { + spin_unlock_bh(&wqueue->lock); + return false; + } + return true; +} + +static inline void unlock_wqueue(struct watch_queue *wqueue) +{ + spin_unlock_bh(&wqueue->lock); +} + static void watch_queue_pipe_buf_release(struct pipe_inode_info *pipe, struct pipe_buffer *buf) { @@ -69,6 +90,10 @@ static const struct pipe_buf_operations watch_queue_pipe_buf_ops = { /* * Post a notification to a watch queue. + * + * Must be called with the RCU lock for reading, and the + * watch_queue lock held, which guarantees that the pipe + * hasn't been released. */ static bool post_one_notification(struct watch_queue *wqueue, struct watch_notification *n) @@ -85,9 +110,6 @@ static bool post_one_notification(struct watch_queue *wqueue, spin_lock_irq(&pipe->rd_wait.lock); - if (wqueue->defunct) - goto out; - mask = pipe->ring_size - 1; head = pipe->head; tail = pipe->tail; @@ -203,7 +225,10 @@ void __post_watch_notification(struct watch_list *wlist, if (security_post_notification(watch->cred, cred, n) < 0) continue; - post_one_notification(wqueue, n); + if (lock_wqueue(wqueue)) { + post_one_notification(wqueue, n); + unlock_wqueue(wqueue);; + } } rcu_read_unlock(); @@ -465,11 +490,12 @@ int add_watch_to_object(struct watch *watch, struct watch_list *wlist) return -EAGAIN; } - spin_lock_bh(&wqueue->lock); - kref_get(&wqueue->usage); - kref_get(&watch->usage); - hlist_add_head(&watch->queue_node, &wqueue->watches); - spin_unlock_bh(&wqueue->lock); + if (lock_wqueue(wqueue)) { + kref_get(&wqueue->usage); + kref_get(&watch->usage); + hlist_add_head(&watch->queue_node, &wqueue->watches); + unlock_wqueue(wqueue); + } hlist_add_head(&watch->list_node, &wlist->watchers); return 0; @@ -523,20 +549,15 @@ int remove_watch_from_object(struct watch_list *wlist, struct watch_queue *wq, wqueue = rcu_dereference(watch->queue); - /* We don't need the watch list lock for the next bit as RCU is - * protecting *wqueue from deallocation. - */ - if (wqueue) { + if (lock_wqueue(wqueue)) { post_one_notification(wqueue, &n.watch); - spin_lock_bh(&wqueue->lock); - if (!hlist_unhashed(&watch->queue_node)) { hlist_del_init_rcu(&watch->queue_node); put_watch(watch); } - spin_unlock_bh(&wqueue->lock); + unlock_wqueue(wqueue); } if (wlist->release_watch) { From ea255921c4b5ea04090fdca9bd9ae1b81f613a07 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Mon, 22 Nov 2021 12:16:46 +0100 Subject: [PATCH 0940/1056] tty: drivers/tty/, stop using tty_schedule_flip() commit 5f6a85158ccacc3f09744b3aafe8b11ab3b6c6f6 upstream. Since commit a9c3f68f3cd8d (tty: Fix low_latency BUG) in 2014, tty_flip_buffer_push() is only a wrapper to tty_schedule_flip(). We are going to remove the latter (as it is used less), so call the former in drivers/tty/. Cc: Vladimir Zapolskiy Reviewed-by: Johan Hovold Signed-off-by: Jiri Slaby Link: https://lore.kernel.org/r/20211122111648.30379-2-jslaby@suse.cz Signed-off-by: Greg Kroah-Hartman --- drivers/tty/goldfish.c | 2 +- drivers/tty/moxa.c | 4 ++-- drivers/tty/serial/lpc32xx_hs.c | 2 +- drivers/tty/vt/keyboard.c | 6 +++--- drivers/tty/vt/vt.c | 2 +- 5 files changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/tty/goldfish.c b/drivers/tty/goldfish.c index 0e32920af10d3a..7ca3cd8eb5742e 100644 --- a/drivers/tty/goldfish.c +++ b/drivers/tty/goldfish.c @@ -151,7 +151,7 @@ static irqreturn_t goldfish_tty_interrupt(int irq, void *dev_id) address = (unsigned long)(void *)buf; goldfish_tty_rw(qtty, address, count, 0); - tty_schedule_flip(&qtty->port); + tty_flip_buffer_push(&qtty->port); return IRQ_HANDLED; } diff --git a/drivers/tty/moxa.c b/drivers/tty/moxa.c index bf17e90858b8cd..a29ec5a938396f 100644 --- a/drivers/tty/moxa.c +++ b/drivers/tty/moxa.c @@ -1383,7 +1383,7 @@ static int moxa_poll_port(struct moxa_port *p, unsigned int handle, if (inited && !tty_throttled(tty) && MoxaPortRxQueue(p) > 0) { /* RX */ MoxaPortReadData(p); - tty_schedule_flip(&p->port); + tty_flip_buffer_push(&p->port); } } else { clear_bit(EMPTYWAIT, &p->statusflags); @@ -1408,7 +1408,7 @@ static int moxa_poll_port(struct moxa_port *p, unsigned int handle, if (tty && (intr & IntrBreak) && !I_IGNBRK(tty)) { /* BREAK */ tty_insert_flip_char(&p->port, 0, TTY_BREAK); - tty_schedule_flip(&p->port); + tty_flip_buffer_push(&p->port); } if (intr & IntrLine) diff --git a/drivers/tty/serial/lpc32xx_hs.c b/drivers/tty/serial/lpc32xx_hs.c index b199d785996118..07c4161eb4cc2c 100644 --- a/drivers/tty/serial/lpc32xx_hs.c +++ b/drivers/tty/serial/lpc32xx_hs.c @@ -341,7 +341,7 @@ static irqreturn_t serial_lpc32xx_interrupt(int irq, void *dev_id) LPC32XX_HSUART_IIR(port->membase)); port->icount.overrun++; tty_insert_flip_char(tport, 0, TTY_OVERRUN); - tty_schedule_flip(tport); + tty_flip_buffer_push(tport); } /* Data received? */ diff --git a/drivers/tty/vt/keyboard.c b/drivers/tty/vt/keyboard.c index c7fbbcdcc34615..3700cd057f2736 100644 --- a/drivers/tty/vt/keyboard.c +++ b/drivers/tty/vt/keyboard.c @@ -324,13 +324,13 @@ int kbd_rate(struct kbd_repeat *rpt) static void put_queue(struct vc_data *vc, int ch) { tty_insert_flip_char(&vc->port, ch, 0); - tty_schedule_flip(&vc->port); + tty_flip_buffer_push(&vc->port); } static void puts_queue(struct vc_data *vc, const char *cp) { tty_insert_flip_string(&vc->port, cp, strlen(cp)); - tty_schedule_flip(&vc->port); + tty_flip_buffer_push(&vc->port); } static void applkey(struct vc_data *vc, int key, char mode) @@ -584,7 +584,7 @@ static void fn_inc_console(struct vc_data *vc) static void fn_send_intr(struct vc_data *vc) { tty_insert_flip_char(&vc->port, 0, TTY_BREAK); - tty_schedule_flip(&vc->port); + tty_flip_buffer_push(&vc->port); } static void fn_scroll_forw(struct vc_data *vc) diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c index 55283a7f973fb9..dfc1f4b445f3bf 100644 --- a/drivers/tty/vt/vt.c +++ b/drivers/tty/vt/vt.c @@ -1833,7 +1833,7 @@ static void csi_m(struct vc_data *vc) static void respond_string(const char *p, size_t len, struct tty_port *port) { tty_insert_flip_string(port, p, len); - tty_schedule_flip(port); + tty_flip_buffer_push(port); } static void cursor_report(struct vc_data *vc, struct tty_struct *tty) From 6219f5b54ad8f0c5c8ae50c17166e67193366aa1 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Mon, 22 Nov 2021 12:16:47 +0100 Subject: [PATCH 0941/1056] tty: the rest, stop using tty_schedule_flip() commit b68b914494df4f79b4e9b58953110574af1cb7a2 upstream. Since commit a9c3f68f3cd8d (tty: Fix low_latency BUG) in 2014, tty_flip_buffer_push() is only a wrapper to tty_schedule_flip(). We are going to remove the latter (as it is used less), so call the former in the rest of the users. Cc: Richard Henderson Cc: Ivan Kokshaysky Cc: Matt Turner Cc: William Hubbs Cc: Chris Brannon Cc: Kirk Reiser Cc: Samuel Thibault Cc: Heiko Carstens Cc: Vasily Gorbik Cc: Christian Borntraeger Cc: Alexander Gordeev Reviewed-by: Johan Hovold Signed-off-by: Jiri Slaby Link: https://lore.kernel.org/r/20211122111648.30379-3-jslaby@suse.cz Signed-off-by: Greg Kroah-Hartman --- arch/alpha/kernel/srmcons.c | 2 +- drivers/accessibility/speakup/spk_ttyio.c | 4 ++-- drivers/s390/char/keyboard.h | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/alpha/kernel/srmcons.c b/arch/alpha/kernel/srmcons.c index 90635ef5dafac4..6dc952b0df4a9a 100644 --- a/arch/alpha/kernel/srmcons.c +++ b/arch/alpha/kernel/srmcons.c @@ -59,7 +59,7 @@ srmcons_do_receive_chars(struct tty_port *port) } while((result.bits.status & 1) && (++loops < 10)); if (count) - tty_schedule_flip(port); + tty_flip_buffer_push(port); return count; } diff --git a/drivers/accessibility/speakup/spk_ttyio.c b/drivers/accessibility/speakup/spk_ttyio.c index 0d1f397cd8961f..08cf8a17754bb6 100644 --- a/drivers/accessibility/speakup/spk_ttyio.c +++ b/drivers/accessibility/speakup/spk_ttyio.c @@ -88,7 +88,7 @@ static int spk_ttyio_receive_buf2(struct tty_struct *tty, } if (!ldisc_data->buf_free) - /* ttyio_in will tty_schedule_flip */ + /* ttyio_in will tty_flip_buffer_push */ return 0; /* Make sure the consumer has read buf before we have seen @@ -312,7 +312,7 @@ static unsigned char ttyio_in(struct spk_synth *in_synth, int timeout) mb(); ldisc_data->buf_free = true; /* Let TTY push more characters */ - tty_schedule_flip(tty->port); + tty_flip_buffer_push(tty->port); return rv; } diff --git a/drivers/s390/char/keyboard.h b/drivers/s390/char/keyboard.h index c467589c7f452f..c06d399b9b1f15 100644 --- a/drivers/s390/char/keyboard.h +++ b/drivers/s390/char/keyboard.h @@ -56,7 +56,7 @@ static inline void kbd_put_queue(struct tty_port *port, int ch) { tty_insert_flip_char(port, ch, 0); - tty_schedule_flip(port); + tty_flip_buffer_push(port); } static inline void @@ -64,5 +64,5 @@ kbd_puts_queue(struct tty_port *port, char *cp) { while (*cp) tty_insert_flip_char(port, *cp++, 0); - tty_schedule_flip(port); + tty_flip_buffer_push(port); } From 35545303454a0a711db2f61eae693647e3a87b4b Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Mon, 22 Nov 2021 12:16:48 +0100 Subject: [PATCH 0942/1056] tty: drop tty_schedule_flip() commit 5db96ef23bda6c2a61a51693c85b78b52d03f654 upstream. Since commit a9c3f68f3cd8d (tty: Fix low_latency BUG) in 2014, tty_flip_buffer_push() is only a wrapper to tty_schedule_flip(). All users were converted in the previous patches, so remove tty_schedule_flip() completely while inlining its body into tty_flip_buffer_push(). One less exported function. Reviewed-by: Johan Hovold Signed-off-by: Jiri Slaby Link: https://lore.kernel.org/r/20211122111648.30379-4-jslaby@suse.cz Signed-off-by: Greg Kroah-Hartman --- drivers/tty/tty_buffer.c | 30 ++++++++---------------------- include/linux/tty_flip.h | 1 - 2 files changed, 8 insertions(+), 23 deletions(-) diff --git a/drivers/tty/tty_buffer.c b/drivers/tty/tty_buffer.c index 6127f84b92b163..d63c1f6c7ef450 100644 --- a/drivers/tty/tty_buffer.c +++ b/drivers/tty/tty_buffer.c @@ -402,27 +402,6 @@ int __tty_insert_flip_char(struct tty_port *port, unsigned char ch, char flag) } EXPORT_SYMBOL(__tty_insert_flip_char); -/** - * tty_schedule_flip - push characters to ldisc - * @port: tty port to push from - * - * Takes any pending buffers and transfers their ownership to the - * ldisc side of the queue. It then schedules those characters for - * processing by the line discipline. - */ - -void tty_schedule_flip(struct tty_port *port) -{ - struct tty_bufhead *buf = &port->buf; - - /* paired w/ acquire in flush_to_ldisc(); ensures - * flush_to_ldisc() sees buffer data. - */ - smp_store_release(&buf->tail->commit, buf->tail->used); - queue_work(system_unbound_wq, &buf->work); -} -EXPORT_SYMBOL(tty_schedule_flip); - /** * tty_prepare_flip_string - make room for characters * @port: tty port @@ -567,7 +546,14 @@ static void flush_to_ldisc(struct work_struct *work) void tty_flip_buffer_push(struct tty_port *port) { - tty_schedule_flip(port); + struct tty_bufhead *buf = &port->buf; + + /* + * Paired w/ acquire in flush_to_ldisc(); ensures flush_to_ldisc() sees + * buffer data. + */ + smp_store_release(&buf->tail->commit, buf->tail->used); + queue_work(system_unbound_wq, &buf->work); } EXPORT_SYMBOL(tty_flip_buffer_push); diff --git a/include/linux/tty_flip.h b/include/linux/tty_flip.h index 32284992b31a00..1fb727b7b969a6 100644 --- a/include/linux/tty_flip.h +++ b/include/linux/tty_flip.h @@ -17,7 +17,6 @@ extern int tty_insert_flip_string_fixed_flag(struct tty_port *port, extern int tty_prepare_flip_string(struct tty_port *port, unsigned char **chars, size_t size); extern void tty_flip_buffer_push(struct tty_port *port); -void tty_schedule_flip(struct tty_port *port); int __tty_insert_flip_char(struct tty_port *port, unsigned char ch, char flag); static inline int tty_insert_flip_char(struct tty_port *port, From 816c301b6a739ae0a78b566248acc0bc81fe05c7 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Thu, 7 Jul 2022 10:25:57 +0200 Subject: [PATCH 0943/1056] tty: extract tty_flip_buffer_commit() from tty_flip_buffer_push() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 716b10580283fda66f2b88140e3964f8a7f9da89 upstream. We will need this new helper in the next patch. Cc: Hillf Danton Cc: 一只狗 Cc: Dan Carpenter Signed-off-by: Jiri Slaby Link: https://lore.kernel.org/r/20220707082558.9250-1-jslaby@suse.cz Signed-off-by: Greg Kroah-Hartman --- drivers/tty/tty_buffer.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/drivers/tty/tty_buffer.c b/drivers/tty/tty_buffer.c index d63c1f6c7ef450..76fb600c1f7933 100644 --- a/drivers/tty/tty_buffer.c +++ b/drivers/tty/tty_buffer.c @@ -533,6 +533,15 @@ static void flush_to_ldisc(struct work_struct *work) } +static inline void tty_flip_buffer_commit(struct tty_buffer *tail) +{ + /* + * Paired w/ acquire in flush_to_ldisc(); ensures flush_to_ldisc() sees + * buffer data. + */ + smp_store_release(&tail->commit, tail->used); +} + /** * tty_flip_buffer_push - terminal * @port: tty port to push @@ -548,11 +557,7 @@ void tty_flip_buffer_push(struct tty_port *port) { struct tty_bufhead *buf = &port->buf; - /* - * Paired w/ acquire in flush_to_ldisc(); ensures flush_to_ldisc() sees - * buffer data. - */ - smp_store_release(&buf->tail->commit, buf->tail->used); + tty_flip_buffer_commit(buf->tail); queue_work(system_unbound_wq, &buf->work); } EXPORT_SYMBOL(tty_flip_buffer_push); From b2d1e4cd558cffec6bfe318f5d74e6cffc374d29 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Thu, 7 Jul 2022 10:25:58 +0200 Subject: [PATCH 0944/1056] tty: use new tty_insert_flip_string_and_push_buffer() in pty_write() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit a501ab75e7624d133a5a3c7ec010687c8b961d23 upstream. There is a race in pty_write(). pty_write() can be called in parallel with e.g. ioctl(TIOCSTI) or ioctl(TCXONC) which also inserts chars to the buffer. Provided, tty_flip_buffer_push() in pty_write() is called outside the lock, it can commit inconsistent tail. This can lead to out of bounds writes and other issues. See the Link below. To fix this, we have to introduce a new helper called tty_insert_flip_string_and_push_buffer(). It does both tty_insert_flip_string() and tty_flip_buffer_commit() under the port lock. It also calls queue_work(), but outside the lock. See 71a174b39f10 (pty: do tty_flip_buffer_push without port->lock in pty_write) for the reasons. Keep the helper internal-only (in drivers' tty.h). It is not intended to be used widely. Link: https://seclists.org/oss-sec/2022/q2/155 Fixes: 71a174b39f10 (pty: do tty_flip_buffer_push without port->lock in pty_write) Cc: 一只狗 Cc: Dan Carpenter Suggested-by: Hillf Danton Signed-off-by: Jiri Slaby Link: https://lore.kernel.org/r/20220707082558.9250-2-jslaby@suse.cz Signed-off-by: Greg Kroah-Hartman --- drivers/tty/pty.c | 14 ++------------ drivers/tty/tty.h | 3 +++ drivers/tty/tty_buffer.c | 31 +++++++++++++++++++++++++++++++ 3 files changed, 36 insertions(+), 12 deletions(-) diff --git a/drivers/tty/pty.c b/drivers/tty/pty.c index 74bfabe5b45381..752dab3356d72b 100644 --- a/drivers/tty/pty.c +++ b/drivers/tty/pty.c @@ -111,21 +111,11 @@ static void pty_unthrottle(struct tty_struct *tty) static int pty_write(struct tty_struct *tty, const unsigned char *buf, int c) { struct tty_struct *to = tty->link; - unsigned long flags; - if (tty->flow.stopped) + if (tty->flow.stopped || !c) return 0; - if (c > 0) { - spin_lock_irqsave(&to->port->lock, flags); - /* Stuff the data into the input queue of the other end */ - c = tty_insert_flip_string(to->port, buf, c); - spin_unlock_irqrestore(&to->port->lock, flags); - /* And shovel */ - if (c) - tty_flip_buffer_push(to->port); - } - return c; + return tty_insert_flip_string_and_push_buffer(to->port, buf, c); } /** diff --git a/drivers/tty/tty.h b/drivers/tty/tty.h index b710c5ef89ab2f..f310a8274df153 100644 --- a/drivers/tty/tty.h +++ b/drivers/tty/tty.h @@ -111,4 +111,7 @@ static inline void tty_audit_tiocsti(struct tty_struct *tty, char ch) ssize_t redirected_tty_write(struct kiocb *, struct iov_iter *); +int tty_insert_flip_string_and_push_buffer(struct tty_port *port, + const unsigned char *chars, size_t cnt); + #endif diff --git a/drivers/tty/tty_buffer.c b/drivers/tty/tty_buffer.c index 76fb600c1f7933..f3143ae4bf7fe8 100644 --- a/drivers/tty/tty_buffer.c +++ b/drivers/tty/tty_buffer.c @@ -562,6 +562,37 @@ void tty_flip_buffer_push(struct tty_port *port) } EXPORT_SYMBOL(tty_flip_buffer_push); +/** + * tty_insert_flip_string_and_push_buffer - add characters to the tty buffer and + * push + * @port: tty port + * @chars: characters + * @size: size + * + * The function combines tty_insert_flip_string() and tty_flip_buffer_push() + * with the exception of properly holding the @port->lock. + * + * To be used only internally (by pty currently). + * + * Returns: the number added. + */ +int tty_insert_flip_string_and_push_buffer(struct tty_port *port, + const unsigned char *chars, size_t size) +{ + struct tty_bufhead *buf = &port->buf; + unsigned long flags; + + spin_lock_irqsave(&port->lock, flags); + size = tty_insert_flip_string(port, chars, size); + if (size) + tty_flip_buffer_commit(buf->tail); + spin_unlock_irqrestore(&port->lock, flags); + + queue_work(system_unbound_wq, &buf->work); + + return size; +} + /** * tty_buffer_init - prepare a tty buffer structure * @port: tty port to initialise From b34229f4b212367196d787170b02be6f31802622 Mon Sep 17 00:00:00 2001 From: Jose Alonso Date: Mon, 13 Jun 2022 15:32:44 -0300 Subject: [PATCH 0945/1056] net: usb: ax88179_178a needs FLAG_SEND_ZLP commit 36a15e1cb134c0395261ba1940762703f778438c upstream. The extra byte inserted by usbnet.c when (length % dev->maxpacket == 0) is causing problems to device. This patch sets FLAG_SEND_ZLP to avoid this. Tested with: 0b95:1790 ASIX Electronics Corp. AX88179 Gigabit Ethernet Problems observed: ====================================================================== 1) Using ssh/sshfs. The remote sshd daemon can abort with the message: "message authentication code incorrect" This happens because the tcp message sent is corrupted during the USB "Bulk out". The device calculate the tcp checksum and send a valid tcp message to the remote sshd. Then the encryption detects the error and aborts. 2) NETDEV WATCHDOG: ... (ax88179_178a): transmit queue 0 timed out 3) Stop normal work without any log message. The "Bulk in" continue receiving packets normally. The host sends "Bulk out" and the device responds with -ECONNRESET. (The netusb.c code tx_complete ignore -ECONNRESET) Under normal conditions these errors take days to happen and in intense usage take hours. A test with ping gives packet loss, showing that something is wrong: ping -4 -s 462 {destination} # 462 = 512 - 42 - 8 Not all packets fail. My guess is that the device tries to find another packet starting at the extra byte and will fail or not depending on the next bytes (old buffer content). ====================================================================== Signed-off-by: Jose Alonso Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/usb/ax88179_178a.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/net/usb/ax88179_178a.c b/drivers/net/usb/ax88179_178a.c index 0a2c3860179e7a..e1b9b78b474e9e 100644 --- a/drivers/net/usb/ax88179_178a.c +++ b/drivers/net/usb/ax88179_178a.c @@ -1796,7 +1796,7 @@ static const struct driver_info ax88179_info = { .link_reset = ax88179_link_reset, .reset = ax88179_reset, .stop = ax88179_stop, - .flags = FLAG_ETHER | FLAG_FRAMING_AX, + .flags = FLAG_ETHER | FLAG_FRAMING_AX | FLAG_SEND_ZLP, .rx_fixup = ax88179_rx_fixup, .tx_fixup = ax88179_tx_fixup, }; @@ -1809,7 +1809,7 @@ static const struct driver_info ax88178a_info = { .link_reset = ax88179_link_reset, .reset = ax88179_reset, .stop = ax88179_stop, - .flags = FLAG_ETHER | FLAG_FRAMING_AX, + .flags = FLAG_ETHER | FLAG_FRAMING_AX | FLAG_SEND_ZLP, .rx_fixup = ax88179_rx_fixup, .tx_fixup = ax88179_tx_fixup, }; @@ -1822,7 +1822,7 @@ static const struct driver_info cypress_GX3_info = { .link_reset = ax88179_link_reset, .reset = ax88179_reset, .stop = ax88179_stop, - .flags = FLAG_ETHER | FLAG_FRAMING_AX, + .flags = FLAG_ETHER | FLAG_FRAMING_AX | FLAG_SEND_ZLP, .rx_fixup = ax88179_rx_fixup, .tx_fixup = ax88179_tx_fixup, }; @@ -1835,7 +1835,7 @@ static const struct driver_info dlink_dub1312_info = { .link_reset = ax88179_link_reset, .reset = ax88179_reset, .stop = ax88179_stop, - .flags = FLAG_ETHER | FLAG_FRAMING_AX, + .flags = FLAG_ETHER | FLAG_FRAMING_AX | FLAG_SEND_ZLP, .rx_fixup = ax88179_rx_fixup, .tx_fixup = ax88179_tx_fixup, }; @@ -1848,7 +1848,7 @@ static const struct driver_info sitecom_info = { .link_reset = ax88179_link_reset, .reset = ax88179_reset, .stop = ax88179_stop, - .flags = FLAG_ETHER | FLAG_FRAMING_AX, + .flags = FLAG_ETHER | FLAG_FRAMING_AX | FLAG_SEND_ZLP, .rx_fixup = ax88179_rx_fixup, .tx_fixup = ax88179_tx_fixup, }; @@ -1861,7 +1861,7 @@ static const struct driver_info samsung_info = { .link_reset = ax88179_link_reset, .reset = ax88179_reset, .stop = ax88179_stop, - .flags = FLAG_ETHER | FLAG_FRAMING_AX, + .flags = FLAG_ETHER | FLAG_FRAMING_AX | FLAG_SEND_ZLP, .rx_fixup = ax88179_rx_fixup, .tx_fixup = ax88179_tx_fixup, }; @@ -1874,7 +1874,7 @@ static const struct driver_info lenovo_info = { .link_reset = ax88179_link_reset, .reset = ax88179_reset, .stop = ax88179_stop, - .flags = FLAG_ETHER | FLAG_FRAMING_AX, + .flags = FLAG_ETHER | FLAG_FRAMING_AX | FLAG_SEND_ZLP, .rx_fixup = ax88179_rx_fixup, .tx_fixup = ax88179_tx_fixup, }; @@ -1887,7 +1887,7 @@ static const struct driver_info belkin_info = { .link_reset = ax88179_link_reset, .reset = ax88179_reset, .stop = ax88179_stop, - .flags = FLAG_ETHER | FLAG_FRAMING_AX, + .flags = FLAG_ETHER | FLAG_FRAMING_AX | FLAG_SEND_ZLP, .rx_fixup = ax88179_rx_fixup, .tx_fixup = ax88179_tx_fixup, }; @@ -1900,7 +1900,7 @@ static const struct driver_info toshiba_info = { .link_reset = ax88179_link_reset, .reset = ax88179_reset, .stop = ax88179_stop, - .flags = FLAG_ETHER | FLAG_FRAMING_AX, + .flags = FLAG_ETHER | FLAG_FRAMING_AX | FLAG_SEND_ZLP, .rx_fixup = ax88179_rx_fixup, .tx_fixup = ax88179_tx_fixup, }; @@ -1913,7 +1913,7 @@ static const struct driver_info mct_info = { .link_reset = ax88179_link_reset, .reset = ax88179_reset, .stop = ax88179_stop, - .flags = FLAG_ETHER | FLAG_FRAMING_AX, + .flags = FLAG_ETHER | FLAG_FRAMING_AX | FLAG_SEND_ZLP, .rx_fixup = ax88179_rx_fixup, .tx_fixup = ax88179_tx_fixup, }; From d026ed6eda29b2d9c5ace9f8142cd6d1ec819c0b Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 21 Jul 2022 10:30:14 -0700 Subject: [PATCH 0946/1056] watch-queue: remove spurious double semicolon commit 44e29e64cf1ac0cffb152e0532227ea6d002aa28 upstream. Sedat Dilek noticed that I had an extraneous semicolon at the end of a line in the previous patch. It's harmless, but unintentional, and while compilers just treat it as an extra empty statement, for all I know some other tooling might warn about it. So clean it up before other people notice too ;) Fixes: 353f7988dd84 ("watchqueue: make sure to serialize 'wqueue->defunct' properly") Reported-by: Sedat Dilek Signed-off-by: Linus Torvalds Reported-by: Sedat Dilek Signed-off-by: Greg Kroah-Hartman --- kernel/watch_queue.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/watch_queue.c b/kernel/watch_queue.c index 3df760b9a775fe..debebcd2664ef1 100644 --- a/kernel/watch_queue.c +++ b/kernel/watch_queue.c @@ -227,7 +227,7 @@ void __post_watch_notification(struct watch_list *wlist, if (lock_wqueue(wqueue)) { post_one_notification(wqueue, n); - unlock_wqueue(wqueue);; + unlock_wqueue(wqueue); } } From 760fe32034931a8c97c773edef2f1b7c7346f43a Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Thu, 4 Nov 2021 16:52:06 -0400 Subject: [PATCH 0947/1056] drm/amd/display: Don't lock connection_mutex for DMUB HPD commit d82b3266ef88dc10fe0e7031b2bd8ba7eedb7e59 upstream. [Why] Per DRM spec we only need to hold that lock when touching connector->state - which we do not do in that handler. Taking this locking introduces unnecessary dependencies with other threads which is bad for performance and opens up the potential for a deadlock since there are multiple locks being held at once. [How] Remove the connection_mutex lock/unlock routine and just iterate over the drm connectors normally. The iter helpers implicitly lock the connection list so this is safe to do. DC link access also does not need to be guarded since the link table is static at creation - we don't dynamically add or remove links, just streams. Fixes: e27c41d5b068 ("drm/amd/display: Support for DMUB HPD interrupt handling") Reviewed-by: Jude Shih Acked-by: Anson Jacob Tested-by: Daniel Wheeler Signed-off-by: Nicholas Kazlauskas Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 3b4449bf4d85f6..2b5e8de940826e 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -668,10 +668,7 @@ void dmub_hpd_callback(struct amdgpu_device *adev, struct dmub_notification *not return; } - drm_modeset_lock(&dev->mode_config.connection_mutex, NULL); - link_index = notify->link_index; - link = adev->dm.dc->links[link_index]; drm_connector_list_iter_begin(dev, &iter); @@ -684,7 +681,6 @@ void dmub_hpd_callback(struct amdgpu_device *adev, struct dmub_notification *not } } drm_connector_list_iter_end(&iter); - drm_modeset_unlock(&dev->mode_config.connection_mutex); } From f85a6046f7718f11c6ad015a0ba25d897194b1ac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Exp=C3=B3sito?= Date: Sun, 9 Jan 2022 19:42:45 +0100 Subject: [PATCH 0948/1056] drm/amd/display: invalid parameter check in dmub_hpd_callback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 978ffac878fd64039f95798b15b430032d2d89d5 upstream. The function performs a check on the "adev" input parameter, however, it is used before the check. Initialize the "dev" variable after the sanity check to avoid a possible NULL pointer dereference. Fixes: e27c41d5b0681 ("drm/amd/display: Support for DMUB HPD interrupt handling") Addresses-Coverity-ID: 1493909 ("Null pointer dereference") Reviewed-by: Harry Wentland Signed-off-by: José Expósito Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 2b5e8de940826e..5323d00f8f59ae 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -653,7 +653,7 @@ void dmub_hpd_callback(struct amdgpu_device *adev, struct dmub_notification *not struct drm_connector_list_iter iter; struct dc_link *link; uint8_t link_index = 0; - struct drm_device *dev = adev->dm.ddev; + struct drm_device *dev; if (adev == NULL) return; @@ -670,6 +670,7 @@ void dmub_hpd_callback(struct amdgpu_device *adev, struct dmub_notification *not link_index = notify->link_index; link = adev->dm.dc->links[link_index]; + dev = adev->dm.ddev; drm_connector_list_iter_begin(dev, &iter); drm_for_each_connector_iter(connector, &iter) { From b6c24afba608b9faed82bd24a3adf2efac269f73 Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Tue, 29 Mar 2022 13:21:45 -0700 Subject: [PATCH 0949/1056] x86/extable: Prefer local labels in .set directives commit 334865b2915c33080624e0d06f1c3e917036472c upstream. Bernardo reported an error that Nathan bisected down to (x86_64) defconfig+LTO_CLANG_FULL+X86_PMEM_LEGACY. LTO vmlinux.o ld.lld: error: :1:13: redefinition of 'found' .set found, 0 ^ :29:1: while in macro instantiation extable_type_reg reg=%eax, type=(17 | ((0) << 16)) ^ This appears to be another LTO specific issue similar to what was folded into commit 4b5305decc84 ("x86/extable: Extend extable functionality"), where the `.set found, 0` in DEFINE_EXTABLE_TYPE_REG in arch/x86/include/asm/asm.h conflicts with the symbol for the static function `found` in arch/x86/kernel/pmem.c. Assembler .set directive declare symbols with global visibility, so the assembler may not rename such symbols in the event of a conflict. LTO could rename static functions if there was a conflict in C sources, but it cannot see into symbols defined in inline asm. The symbols are also retained in the symbol table, regardless of LTO. Give the symbols .L prefixes making them locally visible, so that they may be renamed for LTO to avoid conflicts, and to drop them from the symbol table regardless of LTO. Fixes: 4b5305decc84 ("x86/extable: Extend extable functionality") Reported-by: Bernardo Meurer Costa Debugged-by: Nathan Chancellor Signed-off-by: Nick Desaulniers Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Nathan Chancellor Tested-by: Nathan Chancellor Link: https://lore.kernel.org/r/20220329202148.2379697-1-ndesaulniers@google.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/asm.h | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h index 8cbdfd77a3909c..6dd47c9ec788a1 100644 --- a/arch/x86/include/asm/asm.h +++ b/arch/x86/include/asm/asm.h @@ -149,24 +149,24 @@ # define DEFINE_EXTABLE_TYPE_REG \ ".macro extable_type_reg type:req reg:req\n" \ - ".set found, 0\n" \ - ".set regnr, 0\n" \ + ".set .Lfound, 0\n" \ + ".set .Lregnr, 0\n" \ ".irp rs,rax,rcx,rdx,rbx,rsp,rbp,rsi,rdi,r8,r9,r10,r11,r12,r13,r14,r15\n" \ ".ifc \\reg, %%\\rs\n" \ - ".set found, found+1\n" \ - ".long \\type + (regnr << 8)\n" \ + ".set .Lfound, .Lfound+1\n" \ + ".long \\type + (.Lregnr << 8)\n" \ ".endif\n" \ - ".set regnr, regnr+1\n" \ + ".set .Lregnr, .Lregnr+1\n" \ ".endr\n" \ - ".set regnr, 0\n" \ + ".set .Lregnr, 0\n" \ ".irp rs,eax,ecx,edx,ebx,esp,ebp,esi,edi,r8d,r9d,r10d,r11d,r12d,r13d,r14d,r15d\n" \ ".ifc \\reg, %%\\rs\n" \ - ".set found, found+1\n" \ - ".long \\type + (regnr << 8)\n" \ + ".set .Lfound, .Lfound+1\n" \ + ".long \\type + (.Lregnr << 8)\n" \ ".endif\n" \ - ".set regnr, regnr+1\n" \ + ".set .Lregnr, .Lregnr+1\n" \ ".endr\n" \ - ".if (found != 1)\n" \ + ".if (.Lfound != 1)\n" \ ".error \"extable_type_reg: bad register argument\"\n" \ ".endif\n" \ ".endm\n" From 9444462d6343566155aa18a9dddb0144dd2f1a6e Mon Sep 17 00:00:00 2001 From: Maxim Levitsky Date: Thu, 12 May 2022 13:14:20 +0300 Subject: [PATCH 0950/1056] KVM: x86: fix typo in __try_cmpxchg_user causing non-atomicness commit 33fbe6befa622c082f7d417896832856814bdde0 upstream. This shows up as a TDP MMU leak when running nested. Non-working cmpxchg on L0 relies makes L1 install two different shadow pages under same spte, and one of them is leaked. Fixes: 1c2361f667f36 ("KVM: x86: Use __try_cmpxchg_user() to emulate atomic accesses") Signed-off-by: Maxim Levitsky Message-Id: <20220512101420.306759-1-mlevitsk@redhat.com> Reviewed-by: Sean Christopherson Reviewed-by: Vitaly Kuznetsov Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/x86.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index f9802ceed60ab8..bd410926fda592 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6933,7 +6933,7 @@ static int emulator_cmpxchg_emulated(struct x86_emulate_ctxt *ctxt, goto emul_write; hva = kvm_vcpu_gfn_to_hva(vcpu, gpa_to_gfn(gpa)); - if (kvm_is_error_hva(addr)) + if (kvm_is_error_hva(hva)) goto emul_write; hva += offset_in_page(gpa); From e4481000ac689119ed042caf499bd86b69f14642 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Tue, 7 Jun 2022 17:00:53 +0200 Subject: [PATCH 0951/1056] x86: drop bogus "cc" clobber from __try_cmpxchg_user_asm() commit 1df931d95f4dc1c11db1123e85d4e08156e46ef9 upstream. As noted (and fixed) a couple of times in the past, "=@cc" outputs and clobbering of "cc" don't work well together. The compiler appears to mean to reject such, but doesn't - in its upstream form - quite manage to yet for "cc". Furthermore two similar macros don't clobber "cc", and clobbering "cc" is pointless in asm()-s for x86 anyway - the compiler always assumes status flags to be clobbered there. Fixes: 989b5db215a2 ("x86/uaccess: Implement macros for CMPXCHG on user addresses") Signed-off-by: Jan Beulich Message-Id: <485c0c0b-a3a7-0b7c-5264-7d00c01de032@suse.com> Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/uaccess.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index 2f4c9c168b11d8..ab5e5773730935 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -471,7 +471,7 @@ do { \ [ptr] "+m" (*_ptr), \ [old] "+a" (__old) \ : [new] ltype (__new) \ - : "memory", "cc"); \ + : "memory"); \ if (unlikely(__err)) \ goto label; \ if (unlikely(!success)) \ From ec9ec3bc08b18c5b1b2feafd306ea7c348013898 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 16 May 2022 10:05:48 +0300 Subject: [PATCH 0952/1056] drm/amdgpu: Off by one in dm_dmub_outbox1_low_irq() commit a35faec3db0e13aac8ea720bc1a3503081dd5a3d upstream. The > ARRAY_SIZE() should be >= ARRAY_SIZE() to prevent an out of bounds access. Fixes: e27c41d5b068 ("drm/amd/display: Support for DMUB HPD interrupt handling") Reviewed-by: Harry Wentland Signed-off-by: Dan Carpenter Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 5323d00f8f59ae..adf0517932cc77 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -757,7 +757,7 @@ static void dm_dmub_outbox1_low_irq(void *interrupt_params) if (irq_params->irq_src == DC_IRQ_SOURCE_DMCUB_OUTBOX) { do { dc_stat_get_dmub_notification(adev->dm.dc, ¬ify); - if (notify.type > ARRAY_SIZE(dm->dmub_thread_offload)) { + if (notify.type >= ARRAY_SIZE(dm->dmub_thread_offload)) { DRM_ERROR("DM: notify type %d larger than the array size %ld !", notify.type, ARRAY_SIZE(dm->dmub_thread_offload)); continue; From 198a6f40822eda39f713e345cebdb8523645d3ce Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 11 Jan 2022 12:11:14 +0100 Subject: [PATCH 0953/1056] x86/entry_32: Fix segment exceptions commit 9cdbeec4096804083944d05da96bbaf59a1eb4f9 upstream. The LKP robot reported that commit in Fixes: caused a failure. Turns out the ldt_gdt_32 selftest turns into an infinite loop trying to clear the segment. As discovered by Sean, what happens is that PARANOID_EXIT_TO_KERNEL_MODE in the handle_exception_return path overwrites the entry stack data with the task stack data, restoring the "bad" segment value. Instead of having the exception retry the instruction, have it emulate the full instruction. Replace EX_TYPE_POP_ZERO with EX_TYPE_POP_REG which will do the equivalent of: POP %reg; MOV $imm, %reg. In order to encode the segment registers, add them as registers 8-11 for 32-bit. By setting regs->[defg]s the (nested) RESTORE_REGS will pop this value at the end of the exception handler and by increasing regs->sp, it will have skipped the stack slot. This was debugged by Sean Christopherson . [ bp: Add EX_REG_GS too. ] Fixes: aa93e2ad7464 ("x86/entry_32: Remove .fixup usage") Reported-by: kernel test robot Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/Yd1l0gInc4zRcnt/@hirez.programming.kicks-ass.net Signed-off-by: Greg Kroah-Hartman --- arch/x86/entry/entry_32.S | 13 +++++++++---- arch/x86/include/asm/extable_fixup_types.h | 11 ++++++++++- arch/x86/lib/insn-eval.c | 5 +++++ arch/x86/mm/extable.c | 17 +++-------------- 4 files changed, 27 insertions(+), 19 deletions(-) diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index 2cba70f9753bc0..6b44263d7efbc5 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -268,11 +268,16 @@ 1: popl %ds 2: popl %es 3: popl %fs - addl $(4 + \pop), %esp /* pop the unused "gs" slot */ +4: addl $(4 + \pop), %esp /* pop the unused "gs" slot */ IRET_FRAME - _ASM_EXTABLE_TYPE(1b, 1b, EX_TYPE_POP_ZERO) - _ASM_EXTABLE_TYPE(2b, 2b, EX_TYPE_POP_ZERO) - _ASM_EXTABLE_TYPE(3b, 3b, EX_TYPE_POP_ZERO) + + /* + * There is no _ASM_EXTABLE_TYPE_REG() for ASM, however since this is + * ASM the registers are known and we can trivially hard-code them. + */ + _ASM_EXTABLE_TYPE(1b, 2b, EX_TYPE_POP_ZERO|EX_REG_DS) + _ASM_EXTABLE_TYPE(2b, 3b, EX_TYPE_POP_ZERO|EX_REG_ES) + _ASM_EXTABLE_TYPE(3b, 4b, EX_TYPE_POP_ZERO|EX_REG_FS) .endm .macro RESTORE_ALL_NMI cr3_reg:req pop=0 diff --git a/arch/x86/include/asm/extable_fixup_types.h b/arch/x86/include/asm/extable_fixup_types.h index 7469038de100dc..b3b785b9bb14c4 100644 --- a/arch/x86/include/asm/extable_fixup_types.h +++ b/arch/x86/include/asm/extable_fixup_types.h @@ -16,9 +16,16 @@ #define EX_DATA_FLAG_SHIFT 12 #define EX_DATA_IMM_SHIFT 16 +#define EX_DATA_REG(reg) ((reg) << EX_DATA_REG_SHIFT) #define EX_DATA_FLAG(flag) ((flag) << EX_DATA_FLAG_SHIFT) #define EX_DATA_IMM(imm) ((imm) << EX_DATA_IMM_SHIFT) +/* segment regs */ +#define EX_REG_DS EX_DATA_REG(8) +#define EX_REG_ES EX_DATA_REG(9) +#define EX_REG_FS EX_DATA_REG(10) +#define EX_REG_GS EX_DATA_REG(11) + /* flags */ #define EX_FLAG_CLEAR_AX EX_DATA_FLAG(1) #define EX_FLAG_CLEAR_DX EX_DATA_FLAG(2) @@ -41,7 +48,9 @@ #define EX_TYPE_RDMSR_IN_MCE 13 #define EX_TYPE_DEFAULT_MCE_SAFE 14 #define EX_TYPE_FAULT_MCE_SAFE 15 -#define EX_TYPE_POP_ZERO 16 + +#define EX_TYPE_POP_REG 16 /* sp += sizeof(long) */ +#define EX_TYPE_POP_ZERO (EX_TYPE_POP_REG | EX_DATA_IMM(0)) #define EX_TYPE_IMM_REG 17 /* reg := (long)imm */ #define EX_TYPE_EFAULT_REG (EX_TYPE_IMM_REG | EX_DATA_IMM(-EFAULT)) diff --git a/arch/x86/lib/insn-eval.c b/arch/x86/lib/insn-eval.c index 7760d228041bac..c8a962c2e653d1 100644 --- a/arch/x86/lib/insn-eval.c +++ b/arch/x86/lib/insn-eval.c @@ -430,6 +430,11 @@ static const int pt_regoff[] = { offsetof(struct pt_regs, r13), offsetof(struct pt_regs, r14), offsetof(struct pt_regs, r15), +#else + offsetof(struct pt_regs, ds), + offsetof(struct pt_regs, es), + offsetof(struct pt_regs, fs), + offsetof(struct pt_regs, gs), #endif }; diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c index 1c558f8e8c079b..13d838e6030bc1 100644 --- a/arch/x86/mm/extable.c +++ b/arch/x86/mm/extable.c @@ -118,18 +118,6 @@ static bool ex_handler_clear_fs(const struct exception_table_entry *fixup, return ex_handler_default(fixup, regs); } -static bool ex_handler_pop_zero(const struct exception_table_entry *fixup, - struct pt_regs *regs) -{ - /* - * Typically used for when "pop %seg" traps, in which case we'll clear - * the stack slot and re-try the instruction, which will then succeed - * to pop zero. - */ - *((unsigned long *)regs->sp) = 0; - return ex_handler_default(fixup, regs); -} - static bool ex_handler_imm_reg(const struct exception_table_entry *fixup, struct pt_regs *regs, int reg, int imm) { @@ -203,8 +191,9 @@ int fixup_exception(struct pt_regs *regs, int trapnr, unsigned long error_code, case EX_TYPE_RDMSR_IN_MCE: ex_handler_msr_mce(regs, false); break; - case EX_TYPE_POP_ZERO: - return ex_handler_pop_zero(e, regs); + case EX_TYPE_POP_REG: + regs->sp += sizeof(long); + fallthrough; case EX_TYPE_IMM_REG: return ex_handler_imm_reg(e, regs, reg, imm); } From c6e4817ab622f526d6afe9e02c6d85b177b3e846 Mon Sep 17 00:00:00 2001 From: Hayden Goodfellow Date: Sun, 12 Sep 2021 21:32:09 -0400 Subject: [PATCH 0954/1056] drm/amd/display: Fix wrong format specifier in amdgpu_dm.c commit 655c167edc8c260b6df08bdcfaca8afde0efbeb6 upstream. [Why] Currently, the 32bit kernel build fails due to an incorrect string format specifier. ARRAY_SIZE() returns size_t type as it uses sizeof(). However, we specify it in a string as %ld. This causes a compiler error and causes the 32bit build to fail. [How] Change the %ld to %zu as size_t (which sizeof() returns) is an unsigned integer data type. We use 'z' to ensure it also works with 64bit build. Reviewed-by: Harry Wentland Acked-by: Rodrigo Siqueira Signed-off-by: Hayden Goodfellow Signed-off-by: Alex Deucher Cc: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index adf0517932cc77..e3dfea3d44a456 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -758,7 +758,7 @@ static void dm_dmub_outbox1_low_irq(void *interrupt_params) do { dc_stat_get_dmub_notification(adev->dm.dc, ¬ify); if (notify.type >= ARRAY_SIZE(dm->dmub_thread_offload)) { - DRM_ERROR("DM: notify type %d larger than the array size %ld !", notify.type, + DRM_ERROR("DM: notify type %d larger than the array size %zu!", notify.type, ARRAY_SIZE(dm->dmub_thread_offload)); continue; } From 7d8048d4e064d4ef7719e9520f6c123c051fca99 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 29 Jul 2022 17:25:34 +0200 Subject: [PATCH 0955/1056] Linux 5.15.58 Link: https://lore.kernel.org/r/20220727161026.977588183@linuxfoundation.org Tested-by: Florian Fainelli Tested-by: Linux Kernel Functional Testing Tested-by: Bagas Sanjaya Tested-by: Jon Hunter Tested-by: Shuah Khan Link: https://lore.kernel.org/r/20220728133327.660846209@linuxfoundation.org Tested-by: Jon Hunter Tested-by: Guenter Roeck Tested-by: Ron Economos Tested-by: Bagas Sanjaya Tested-by: Linux Kernel Functional Testing Tested-by: Sudip Mukherjee Signed-off-by: Greg Kroah-Hartman --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 69bfff4d9c2d6b..d7ba0de250cbde 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 5 PATCHLEVEL = 15 -SUBLEVEL = 57 +SUBLEVEL = 58 EXTRAVERSION = NAME = Trick or Treat From f32d5615a78a1256c4f557ccc6543866e75d03f4 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Thu, 21 Jul 2022 09:10:50 -0700 Subject: [PATCH 0956/1056] Bluetooth: L2CAP: Fix use-after-free caused by l2cap_chan_put commit d0be8347c623e0ac4202a1d4e0373882821f56b0 upstream. This fixes the following trace which is caused by hci_rx_work starting up *after* the final channel reference has been put() during sock_close() but *before* the references to the channel have been destroyed, so instead the code now rely on kref_get_unless_zero/l2cap_chan_hold_unless_zero to prevent referencing a channel that is about to be destroyed. refcount_t: increment on 0; use-after-free. BUG: KASAN: use-after-free in refcount_dec_and_test+0x20/0xd0 Read of size 4 at addr ffffffc114f5bf18 by task kworker/u17:14/705 CPU: 4 PID: 705 Comm: kworker/u17:14 Tainted: G S W 4.14.234-00003-g1fb6d0bd49a4-dirty #28 Hardware name: Qualcomm Technologies, Inc. SM8150 V2 PM8150 Google Inc. MSM sm8150 Flame DVT (DT) Workqueue: hci0 hci_rx_work Call trace: dump_backtrace+0x0/0x378 show_stack+0x20/0x2c dump_stack+0x124/0x148 print_address_description+0x80/0x2e8 __kasan_report+0x168/0x188 kasan_report+0x10/0x18 __asan_load4+0x84/0x8c refcount_dec_and_test+0x20/0xd0 l2cap_chan_put+0x48/0x12c l2cap_recv_frame+0x4770/0x6550 l2cap_recv_acldata+0x44c/0x7a4 hci_acldata_packet+0x100/0x188 hci_rx_work+0x178/0x23c process_one_work+0x35c/0x95c worker_thread+0x4cc/0x960 kthread+0x1a8/0x1c4 ret_from_fork+0x10/0x18 Cc: stable@kernel.org Reported-by: Lee Jones Signed-off-by: Luiz Augusto von Dentz Tested-by: Lee Jones Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Greg Kroah-Hartman --- include/net/bluetooth/l2cap.h | 1 + net/bluetooth/l2cap_core.c | 61 +++++++++++++++++++++++++++-------- 2 files changed, 49 insertions(+), 13 deletions(-) diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h index 3c4f550e5a8b75..2f766e3437ce2e 100644 --- a/include/net/bluetooth/l2cap.h +++ b/include/net/bluetooth/l2cap.h @@ -847,6 +847,7 @@ enum { }; void l2cap_chan_hold(struct l2cap_chan *c); +struct l2cap_chan *l2cap_chan_hold_unless_zero(struct l2cap_chan *c); void l2cap_chan_put(struct l2cap_chan *c); static inline void l2cap_chan_lock(struct l2cap_chan *chan) diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index c57a45df7a267f..780073f6affcb8 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -111,7 +111,8 @@ static struct l2cap_chan *__l2cap_get_chan_by_scid(struct l2cap_conn *conn, } /* Find channel with given SCID. - * Returns locked channel. */ + * Returns a reference locked channel. + */ static struct l2cap_chan *l2cap_get_chan_by_scid(struct l2cap_conn *conn, u16 cid) { @@ -119,15 +120,19 @@ static struct l2cap_chan *l2cap_get_chan_by_scid(struct l2cap_conn *conn, mutex_lock(&conn->chan_lock); c = __l2cap_get_chan_by_scid(conn, cid); - if (c) - l2cap_chan_lock(c); + if (c) { + /* Only lock if chan reference is not 0 */ + c = l2cap_chan_hold_unless_zero(c); + if (c) + l2cap_chan_lock(c); + } mutex_unlock(&conn->chan_lock); return c; } /* Find channel with given DCID. - * Returns locked channel. + * Returns a reference locked channel. */ static struct l2cap_chan *l2cap_get_chan_by_dcid(struct l2cap_conn *conn, u16 cid) @@ -136,8 +141,12 @@ static struct l2cap_chan *l2cap_get_chan_by_dcid(struct l2cap_conn *conn, mutex_lock(&conn->chan_lock); c = __l2cap_get_chan_by_dcid(conn, cid); - if (c) - l2cap_chan_lock(c); + if (c) { + /* Only lock if chan reference is not 0 */ + c = l2cap_chan_hold_unless_zero(c); + if (c) + l2cap_chan_lock(c); + } mutex_unlock(&conn->chan_lock); return c; @@ -162,8 +171,12 @@ static struct l2cap_chan *l2cap_get_chan_by_ident(struct l2cap_conn *conn, mutex_lock(&conn->chan_lock); c = __l2cap_get_chan_by_ident(conn, ident); - if (c) - l2cap_chan_lock(c); + if (c) { + /* Only lock if chan reference is not 0 */ + c = l2cap_chan_hold_unless_zero(c); + if (c) + l2cap_chan_lock(c); + } mutex_unlock(&conn->chan_lock); return c; @@ -497,6 +510,16 @@ void l2cap_chan_hold(struct l2cap_chan *c) kref_get(&c->kref); } +struct l2cap_chan *l2cap_chan_hold_unless_zero(struct l2cap_chan *c) +{ + BT_DBG("chan %p orig refcnt %u", c, kref_read(&c->kref)); + + if (!kref_get_unless_zero(&c->kref)) + return NULL; + + return c; +} + void l2cap_chan_put(struct l2cap_chan *c) { BT_DBG("chan %p orig refcnt %u", c, kref_read(&c->kref)); @@ -1969,7 +1992,10 @@ static struct l2cap_chan *l2cap_global_chan_by_psm(int state, __le16 psm, src_match = !bacmp(&c->src, src); dst_match = !bacmp(&c->dst, dst); if (src_match && dst_match) { - l2cap_chan_hold(c); + c = l2cap_chan_hold_unless_zero(c); + if (!c) + continue; + read_unlock(&chan_list_lock); return c; } @@ -1984,7 +2010,7 @@ static struct l2cap_chan *l2cap_global_chan_by_psm(int state, __le16 psm, } if (c1) - l2cap_chan_hold(c1); + c1 = l2cap_chan_hold_unless_zero(c1); read_unlock(&chan_list_lock); @@ -4464,6 +4490,7 @@ static inline int l2cap_config_req(struct l2cap_conn *conn, unlock: l2cap_chan_unlock(chan); + l2cap_chan_put(chan); return err; } @@ -4578,6 +4605,7 @@ static inline int l2cap_config_rsp(struct l2cap_conn *conn, done: l2cap_chan_unlock(chan); + l2cap_chan_put(chan); return err; } @@ -5305,6 +5333,7 @@ static inline int l2cap_move_channel_req(struct l2cap_conn *conn, l2cap_send_move_chan_rsp(chan, result); l2cap_chan_unlock(chan); + l2cap_chan_put(chan); return 0; } @@ -5397,6 +5426,7 @@ static void l2cap_move_continue(struct l2cap_conn *conn, u16 icid, u16 result) } l2cap_chan_unlock(chan); + l2cap_chan_put(chan); } static void l2cap_move_fail(struct l2cap_conn *conn, u8 ident, u16 icid, @@ -5426,6 +5456,7 @@ static void l2cap_move_fail(struct l2cap_conn *conn, u8 ident, u16 icid, l2cap_send_move_chan_cfm(chan, L2CAP_MC_UNCONFIRMED); l2cap_chan_unlock(chan); + l2cap_chan_put(chan); } static int l2cap_move_channel_rsp(struct l2cap_conn *conn, @@ -5489,6 +5520,7 @@ static int l2cap_move_channel_confirm(struct l2cap_conn *conn, l2cap_send_move_chan_cfm_rsp(conn, cmd->ident, icid); l2cap_chan_unlock(chan); + l2cap_chan_put(chan); return 0; } @@ -5524,6 +5556,7 @@ static inline int l2cap_move_channel_confirm_rsp(struct l2cap_conn *conn, } l2cap_chan_unlock(chan); + l2cap_chan_put(chan); return 0; } @@ -5896,12 +5929,11 @@ static inline int l2cap_le_credits(struct l2cap_conn *conn, if (credits > max_credits) { BT_ERR("LE credits overflow"); l2cap_send_disconn_req(chan, ECONNRESET); - l2cap_chan_unlock(chan); /* Return 0 so that we don't trigger an unnecessary * command reject packet. */ - return 0; + goto unlock; } chan->tx_credits += credits; @@ -5912,7 +5944,9 @@ static inline int l2cap_le_credits(struct l2cap_conn *conn, if (chan->tx_credits) chan->ops->resume(chan); +unlock: l2cap_chan_unlock(chan); + l2cap_chan_put(chan); return 0; } @@ -7598,6 +7632,7 @@ static void l2cap_data_channel(struct l2cap_conn *conn, u16 cid, done: l2cap_chan_unlock(chan); + l2cap_chan_put(chan); } static void l2cap_conless_channel(struct l2cap_conn *conn, __le16 psm, @@ -8086,7 +8121,7 @@ static struct l2cap_chan *l2cap_global_fixed_chan(struct l2cap_chan *c, if (src_type != c->src_type) continue; - l2cap_chan_hold(c); + c = l2cap_chan_hold_unless_zero(c); read_unlock(&chan_list_lock); return c; } From 46f6301fb4f1e8de0c139f7146677f73ef34c1a1 Mon Sep 17 00:00:00 2001 From: Junxiao Bi Date: Fri, 3 Jun 2022 15:28:01 -0700 Subject: [PATCH 0957/1056] Revert "ocfs2: mount shared volume without ha stack" commit c80af0c250c8f8a3c978aa5aafbe9c39b336b813 upstream. This reverts commit 912f655d78c5d4ad05eac287f23a435924df7144. This commit introduced a regression that can cause mount hung. The changes in __ocfs2_find_empty_slot causes that any node with none-zero node number can grab the slot that was already taken by node 0, so node 1 will access the same journal with node 0, when it try to grab journal cluster lock, it will hung because it was already acquired by node 0. It's very easy to reproduce this, in one cluster, mount node 0 first, then node 1, you will see the following call trace from node 1. [13148.735424] INFO: task mount.ocfs2:53045 blocked for more than 122 seconds. [13148.739691] Not tainted 5.15.0-2148.0.4.el8uek.mountracev2.x86_64 #2 [13148.742560] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [13148.745846] task:mount.ocfs2 state:D stack: 0 pid:53045 ppid: 53044 flags:0x00004000 [13148.749354] Call Trace: [13148.750718] [13148.752019] ? usleep_range+0x90/0x89 [13148.753882] __schedule+0x210/0x567 [13148.755684] schedule+0x44/0xa8 [13148.757270] schedule_timeout+0x106/0x13c [13148.759273] ? __prepare_to_swait+0x53/0x78 [13148.761218] __wait_for_common+0xae/0x163 [13148.763144] __ocfs2_cluster_lock.constprop.0+0x1d6/0x870 [ocfs2] [13148.765780] ? ocfs2_inode_lock_full_nested+0x18d/0x398 [ocfs2] [13148.768312] ocfs2_inode_lock_full_nested+0x18d/0x398 [ocfs2] [13148.770968] ocfs2_journal_init+0x91/0x340 [ocfs2] [13148.773202] ocfs2_check_volume+0x39/0x461 [ocfs2] [13148.775401] ? iput+0x69/0xba [13148.777047] ocfs2_mount_volume.isra.0.cold+0x40/0x1f5 [ocfs2] [13148.779646] ocfs2_fill_super+0x54b/0x853 [ocfs2] [13148.781756] mount_bdev+0x190/0x1b7 [13148.783443] ? ocfs2_remount+0x440/0x440 [ocfs2] [13148.785634] legacy_get_tree+0x27/0x48 [13148.787466] vfs_get_tree+0x25/0xd0 [13148.789270] do_new_mount+0x18c/0x2d9 [13148.791046] __x64_sys_mount+0x10e/0x142 [13148.792911] do_syscall_64+0x3b/0x89 [13148.794667] entry_SYSCALL_64_after_hwframe+0x170/0x0 [13148.797051] RIP: 0033:0x7f2309f6e26e [13148.798784] RSP: 002b:00007ffdcee7d408 EFLAGS: 00000246 ORIG_RAX: 00000000000000a5 [13148.801974] RAX: ffffffffffffffda RBX: 00007ffdcee7d4a0 RCX: 00007f2309f6e26e [13148.804815] RDX: 0000559aa762a8ae RSI: 0000559aa939d340 RDI: 0000559aa93a22b0 [13148.807719] RBP: 00007ffdcee7d5b0 R08: 0000559aa93a2290 R09: 00007f230a0b4820 [13148.810659] R10: 0000000000000000 R11: 0000000000000246 R12: 00007ffdcee7d420 [13148.813609] R13: 0000000000000000 R14: 0000559aa939f000 R15: 0000000000000000 [13148.816564] To fix it, we can just fix __ocfs2_find_empty_slot. But original commit introduced the feature to mount ocfs2 locally even it is cluster based, that is a very dangerous, it can easily cause serious data corruption, there is no way to stop other nodes mounting the fs and corrupting it. Setup ha or other cluster-aware stack is just the cost that we have to take for avoiding corruption, otherwise we have to do it in kernel. Link: https://lkml.kernel.org/r/20220603222801.42488-1-junxiao.bi@oracle.com Fixes: 912f655d78c5("ocfs2: mount shared volume without ha stack") Signed-off-by: Junxiao Bi Acked-by: Joseph Qi Cc: Mark Fasheh Cc: Joel Becker Cc: Changwei Ge Cc: Gang He Cc: Jun Piao Cc: Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- fs/ocfs2/ocfs2.h | 4 +--- fs/ocfs2/slot_map.c | 46 +++++++++++++++++++-------------------------- fs/ocfs2/super.c | 21 --------------------- 3 files changed, 20 insertions(+), 51 deletions(-) diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h index bb62cc2e0211b3..cf21aecdf54760 100644 --- a/fs/ocfs2/ocfs2.h +++ b/fs/ocfs2/ocfs2.h @@ -277,7 +277,6 @@ enum ocfs2_mount_options OCFS2_MOUNT_JOURNAL_ASYNC_COMMIT = 1 << 15, /* Journal Async Commit */ OCFS2_MOUNT_ERRORS_CONT = 1 << 16, /* Return EIO to the calling process on error */ OCFS2_MOUNT_ERRORS_ROFS = 1 << 17, /* Change filesystem to read-only on error */ - OCFS2_MOUNT_NOCLUSTER = 1 << 18, /* No cluster aware filesystem mount */ }; #define OCFS2_OSB_SOFT_RO 0x0001 @@ -673,8 +672,7 @@ static inline int ocfs2_cluster_o2cb_global_heartbeat(struct ocfs2_super *osb) static inline int ocfs2_mount_local(struct ocfs2_super *osb) { - return ((osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_LOCAL_MOUNT) - || (osb->s_mount_opt & OCFS2_MOUNT_NOCLUSTER)); + return (osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_LOCAL_MOUNT); } static inline int ocfs2_uses_extended_slot_map(struct ocfs2_super *osb) diff --git a/fs/ocfs2/slot_map.c b/fs/ocfs2/slot_map.c index 0b0ae3ebb0cf5e..da7718cef735e5 100644 --- a/fs/ocfs2/slot_map.c +++ b/fs/ocfs2/slot_map.c @@ -252,16 +252,14 @@ static int __ocfs2_find_empty_slot(struct ocfs2_slot_info *si, int i, ret = -ENOSPC; if ((preferred >= 0) && (preferred < si->si_num_slots)) { - if (!si->si_slots[preferred].sl_valid || - !si->si_slots[preferred].sl_node_num) { + if (!si->si_slots[preferred].sl_valid) { ret = preferred; goto out; } } for(i = 0; i < si->si_num_slots; i++) { - if (!si->si_slots[i].sl_valid || - !si->si_slots[i].sl_node_num) { + if (!si->si_slots[i].sl_valid) { ret = i; break; } @@ -456,30 +454,24 @@ int ocfs2_find_slot(struct ocfs2_super *osb) spin_lock(&osb->osb_lock); ocfs2_update_slot_info(si); - if (ocfs2_mount_local(osb)) - /* use slot 0 directly in local mode */ - slot = 0; - else { - /* search for ourselves first and take the slot if it already - * exists. Perhaps we need to mark this in a variable for our - * own journal recovery? Possibly not, though we certainly - * need to warn to the user */ - slot = __ocfs2_node_num_to_slot(si, osb->node_num); + /* search for ourselves first and take the slot if it already + * exists. Perhaps we need to mark this in a variable for our + * own journal recovery? Possibly not, though we certainly + * need to warn to the user */ + slot = __ocfs2_node_num_to_slot(si, osb->node_num); + if (slot < 0) { + /* if no slot yet, then just take 1st available + * one. */ + slot = __ocfs2_find_empty_slot(si, osb->preferred_slot); if (slot < 0) { - /* if no slot yet, then just take 1st available - * one. */ - slot = __ocfs2_find_empty_slot(si, osb->preferred_slot); - if (slot < 0) { - spin_unlock(&osb->osb_lock); - mlog(ML_ERROR, "no free slots available!\n"); - status = -EINVAL; - goto bail; - } - } else - printk(KERN_INFO "ocfs2: Slot %d on device (%s) was " - "already allocated to this node!\n", - slot, osb->dev_str); - } + spin_unlock(&osb->osb_lock); + mlog(ML_ERROR, "no free slots available!\n"); + status = -EINVAL; + goto bail; + } + } else + printk(KERN_INFO "ocfs2: Slot %d on device (%s) was already " + "allocated to this node!\n", slot, osb->dev_str); ocfs2_set_slot(si, slot, osb->node_num); osb->slot_num = slot; diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 7ba3dabe16f099..a03f0cabff0bf6 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -173,7 +173,6 @@ enum { Opt_dir_resv_level, Opt_journal_async_commit, Opt_err_cont, - Opt_nocluster, Opt_err, }; @@ -207,7 +206,6 @@ static const match_table_t tokens = { {Opt_dir_resv_level, "dir_resv_level=%u"}, {Opt_journal_async_commit, "journal_async_commit"}, {Opt_err_cont, "errors=continue"}, - {Opt_nocluster, "nocluster"}, {Opt_err, NULL} }; @@ -619,13 +617,6 @@ static int ocfs2_remount(struct super_block *sb, int *flags, char *data) goto out; } - tmp = OCFS2_MOUNT_NOCLUSTER; - if ((osb->s_mount_opt & tmp) != (parsed_options.mount_opt & tmp)) { - ret = -EINVAL; - mlog(ML_ERROR, "Cannot change nocluster option on remount\n"); - goto out; - } - tmp = OCFS2_MOUNT_HB_LOCAL | OCFS2_MOUNT_HB_GLOBAL | OCFS2_MOUNT_HB_NONE; if ((osb->s_mount_opt & tmp) != (parsed_options.mount_opt & tmp)) { @@ -866,7 +857,6 @@ static int ocfs2_verify_userspace_stack(struct ocfs2_super *osb, } if (ocfs2_userspace_stack(osb) && - !(osb->s_mount_opt & OCFS2_MOUNT_NOCLUSTER) && strncmp(osb->osb_cluster_stack, mopt->cluster_stack, OCFS2_STACK_LABEL_LEN)) { mlog(ML_ERROR, @@ -1145,11 +1135,6 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) osb->s_mount_opt & OCFS2_MOUNT_DATA_WRITEBACK ? "writeback" : "ordered"); - if ((osb->s_mount_opt & OCFS2_MOUNT_NOCLUSTER) && - !(osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_LOCAL_MOUNT)) - printk(KERN_NOTICE "ocfs2: The shared device (%s) is mounted " - "without cluster aware mode.\n", osb->dev_str); - atomic_set(&osb->vol_state, VOLUME_MOUNTED); wake_up(&osb->osb_mount_event); @@ -1456,9 +1441,6 @@ static int ocfs2_parse_options(struct super_block *sb, case Opt_journal_async_commit: mopt->mount_opt |= OCFS2_MOUNT_JOURNAL_ASYNC_COMMIT; break; - case Opt_nocluster: - mopt->mount_opt |= OCFS2_MOUNT_NOCLUSTER; - break; default: mlog(ML_ERROR, "Unrecognized mount option \"%s\" " @@ -1570,9 +1552,6 @@ static int ocfs2_show_options(struct seq_file *s, struct dentry *root) if (opts & OCFS2_MOUNT_JOURNAL_ASYNC_COMMIT) seq_printf(s, ",journal_async_commit"); - if (opts & OCFS2_MOUNT_NOCLUSTER) - seq_printf(s, ",nocluster"); - return 0; } From 518df26b5238188d490f8c09064acd3be48543e2 Mon Sep 17 00:00:00 2001 From: ChenXiaoSong Date: Thu, 7 Jul 2022 18:53:29 +0800 Subject: [PATCH 0958/1056] ntfs: fix use-after-free in ntfs_ucsncmp() commit 38c9c22a85aeed28d0831f230136e9cf6fa2ed44 upstream. Syzkaller reported use-after-free bug as follows: ================================================================== BUG: KASAN: use-after-free in ntfs_ucsncmp+0x123/0x130 Read of size 2 at addr ffff8880751acee8 by task a.out/879 CPU: 7 PID: 879 Comm: a.out Not tainted 5.19.0-rc4-next-20220630-00001-gcc5218c8bd2c-dirty #7 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014 Call Trace: dump_stack_lvl+0x1c0/0x2b0 print_address_description.constprop.0.cold+0xd4/0x484 print_report.cold+0x55/0x232 kasan_report+0xbf/0xf0 ntfs_ucsncmp+0x123/0x130 ntfs_are_names_equal.cold+0x2b/0x41 ntfs_attr_find+0x43b/0xb90 ntfs_attr_lookup+0x16d/0x1e0 ntfs_read_locked_attr_inode+0x4aa/0x2360 ntfs_attr_iget+0x1af/0x220 ntfs_read_locked_inode+0x246c/0x5120 ntfs_iget+0x132/0x180 load_system_files+0x1cc6/0x3480 ntfs_fill_super+0xa66/0x1cf0 mount_bdev+0x38d/0x460 legacy_get_tree+0x10d/0x220 vfs_get_tree+0x93/0x300 do_new_mount+0x2da/0x6d0 path_mount+0x496/0x19d0 __x64_sys_mount+0x284/0x300 do_syscall_64+0x3b/0xc0 entry_SYSCALL_64_after_hwframe+0x46/0xb0 RIP: 0033:0x7f3f2118d9ea Code: 48 8b 0d a9 f4 0b 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 49 89 ca b8 a5 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 76 f4 0b 00 f7 d8 64 89 01 48 RSP: 002b:00007ffc269deac8 EFLAGS: 00000202 ORIG_RAX: 00000000000000a5 RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f3f2118d9ea RDX: 0000000020000000 RSI: 0000000020000100 RDI: 00007ffc269dec00 RBP: 00007ffc269dec80 R08: 00007ffc269deb00 R09: 00007ffc269dec44 R10: 0000000000000000 R11: 0000000000000202 R12: 000055f81ab1d220 R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000 The buggy address belongs to the physical page: page:0000000085430378 refcount:1 mapcount:1 mapping:0000000000000000 index:0x555c6a81d pfn:0x751ac memcg:ffff888101f7e180 anon flags: 0xfffffc00a0014(uptodate|lru|mappedtodisk|swapbacked|node=0|zone=1|lastcpupid=0x1fffff) raw: 000fffffc00a0014 ffffea0001bf2988 ffffea0001de2448 ffff88801712e201 raw: 0000000555c6a81d 0000000000000000 0000000100000000 ffff888101f7e180 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff8880751acd80: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ffff8880751ace00: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 >ffff8880751ace80: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ^ ffff8880751acf00: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ffff8880751acf80: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ================================================================== The reason is that struct ATTR_RECORD->name_offset is 6485, end address of name string is out of bounds. Fix this by adding sanity check on end address of attribute name string. [akpm@linux-foundation.org: coding-style cleanups] [chenxiaosong2@huawei.com: cleanup suggested by Hawkins Jiawei] Link: https://lkml.kernel.org/r/20220709064511.3304299-1-chenxiaosong2@huawei.com Link: https://lkml.kernel.org/r/20220707105329.4020708-1-chenxiaosong2@huawei.com Signed-off-by: ChenXiaoSong Signed-off-by: Hawkins Jiawei Cc: Anton Altaparmakov Cc: ChenXiaoSong Cc: Yongqiang Liu Cc: Zhang Yi Cc: Zhang Xiaoxu Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- fs/ntfs/attrib.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/fs/ntfs/attrib.c b/fs/ntfs/attrib.c index d563abc3e13643..914e9917313000 100644 --- a/fs/ntfs/attrib.c +++ b/fs/ntfs/attrib.c @@ -592,8 +592,12 @@ static int ntfs_attr_find(const ATTR_TYPE type, const ntfschar *name, a = (ATTR_RECORD*)((u8*)ctx->attr + le32_to_cpu(ctx->attr->length)); for (;; a = (ATTR_RECORD*)((u8*)a + le32_to_cpu(a->length))) { - if ((u8*)a < (u8*)ctx->mrec || (u8*)a > (u8*)ctx->mrec + - le32_to_cpu(ctx->mrec->bytes_allocated)) + u8 *mrec_end = (u8 *)ctx->mrec + + le32_to_cpu(ctx->mrec->bytes_allocated); + u8 *name_end = (u8 *)a + le16_to_cpu(a->name_offset) + + a->name_length * sizeof(ntfschar); + if ((u8*)a < (u8*)ctx->mrec || (u8*)a > mrec_end || + name_end > mrec_end) break; ctx->attr = a; if (unlikely(le32_to_cpu(a->type) > le32_to_cpu(type) || From 3ef8040afce74c05fa7c79a316b7b0eb28f6e7ef Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Sat, 16 Jul 2022 21:37:10 -0700 Subject: [PATCH 0959/1056] fs: sendfile handles O_NONBLOCK of out_fd commit bdeb77bc2c405fa9f954c20269db175a0bd2793f upstream. sendfile has to return EAGAIN if out_fd is nonblocking and the write into it would block. Here is a small reproducer for the problem: #define _GNU_SOURCE /* See feature_test_macros(7) */ #include #include #include #include #include #include #include #define FILE_SIZE (1UL << 30) int main(int argc, char **argv) { int p[2], fd; if (pipe2(p, O_NONBLOCK)) return 1; fd = open(argv[1], O_RDWR | O_TMPFILE, 0666); if (fd < 0) return 1; ftruncate(fd, FILE_SIZE); if (sendfile(p[1], fd, 0, FILE_SIZE) == -1) { fprintf(stderr, "FAIL\n"); } if (sendfile(p[1], fd, 0, FILE_SIZE) != -1 || errno != EAGAIN) { fprintf(stderr, "FAIL\n"); } return 0; } It worked before b964bf53e540, it is stuck after b964bf53e540, and it works again with this fix. This regression occurred because do_splice_direct() calls pipe_write that handles O_NONBLOCK. Here is a trace log from the reproducer: 1) | __x64_sys_sendfile64() { 1) | do_sendfile() { 1) | __fdget() 1) | rw_verify_area() 1) | __fdget() 1) | rw_verify_area() 1) | do_splice_direct() { 1) | rw_verify_area() 1) | splice_direct_to_actor() { 1) | do_splice_to() { 1) | rw_verify_area() 1) | generic_file_splice_read() 1) + 74.153 us | } 1) | direct_splice_actor() { 1) | iter_file_splice_write() { 1) | __kmalloc() 1) 0.148 us | pipe_lock(); 1) 0.153 us | splice_from_pipe_next.part.0(); 1) 0.162 us | page_cache_pipe_buf_confirm(); ... 16 times 1) 0.159 us | page_cache_pipe_buf_confirm(); 1) | vfs_iter_write() { 1) | do_iter_write() { 1) | rw_verify_area() 1) | do_iter_readv_writev() { 1) | pipe_write() { 1) | mutex_lock() 1) 0.153 us | mutex_unlock(); 1) 1.368 us | } 1) 1.686 us | } 1) 5.798 us | } 1) 6.084 us | } 1) 0.174 us | kfree(); 1) 0.152 us | pipe_unlock(); 1) + 14.461 us | } 1) + 14.783 us | } 1) 0.164 us | page_cache_pipe_buf_release(); ... 16 times 1) 0.161 us | page_cache_pipe_buf_release(); 1) | touch_atime() 1) + 95.854 us | } 1) + 99.784 us | } 1) ! 107.393 us | } 1) ! 107.699 us | } Link: https://lkml.kernel.org/r/20220415005015.525191-1-avagin@gmail.com Fixes: b964bf53e540 ("teach sendfile(2) to handle send-to-pipe directly") Signed-off-by: Andrei Vagin Cc: Al Viro Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- fs/read_write.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/read_write.c b/fs/read_write.c index c6db1a0762fab8..8d3ec975514d0f 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -1250,6 +1250,9 @@ static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos, count, fl); file_end_write(out.file); } else { + if (out.file->f_flags & O_NONBLOCK) + fl |= SPLICE_F_NONBLOCK; + retval = splice_file_to_pipe(in.file, opipe, &pos, count, fl); } From 70d0ce332d2691abd34507eba920baf2ee9b5aea Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Thu, 7 Jul 2022 19:56:50 +0300 Subject: [PATCH 0960/1056] secretmem: fix unhandled fault in truncate commit 84ac013046ccc438af04b7acecd4d3ab84fe4bde upstream. syzkaller reports the following issue: BUG: unable to handle page fault for address: ffff888021f7e005 PGD 11401067 P4D 11401067 PUD 11402067 PMD 21f7d063 PTE 800fffffde081060 Oops: 0002 [#1] PREEMPT SMP KASAN CPU: 0 PID: 3761 Comm: syz-executor281 Not tainted 5.19.0-rc4-syzkaller-00014-g941e3e791269 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:memset_erms+0x9/0x10 arch/x86/lib/memset_64.S:64 Code: c1 e9 03 40 0f b6 f6 48 b8 01 01 01 01 01 01 01 01 48 0f af c6 f3 48 ab 89 d1 f3 aa 4c 89 c8 c3 90 49 89 f9 40 88 f0 48 89 d1 aa 4c 89 c8 c3 90 49 89 fa 40 0f b6 ce 48 b8 01 01 01 01 01 01 RSP: 0018:ffffc9000329fa90 EFLAGS: 00010202 RAX: 0000000000000000 RBX: 0000000000001000 RCX: 0000000000000ffb RDX: 0000000000000ffb RSI: 0000000000000000 RDI: ffff888021f7e005 RBP: ffffea000087df80 R08: 0000000000000001 R09: ffff888021f7e005 R10: ffffed10043efdff R11: 0000000000000000 R12: 0000000000000005 R13: 0000000000000000 R14: 0000000000001000 R15: 0000000000000ffb FS: 00007fb29d8b2700(0000) GS:ffff8880b9a00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: ffff888021f7e005 CR3: 0000000026e7b000 CR4: 00000000003506f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: zero_user_segments include/linux/highmem.h:272 [inline] folio_zero_range include/linux/highmem.h:428 [inline] truncate_inode_partial_folio+0x76a/0xdf0 mm/truncate.c:237 truncate_inode_pages_range+0x83b/0x1530 mm/truncate.c:381 truncate_inode_pages mm/truncate.c:452 [inline] truncate_pagecache+0x63/0x90 mm/truncate.c:753 simple_setattr+0xed/0x110 fs/libfs.c:535 secretmem_setattr+0xae/0xf0 mm/secretmem.c:170 notify_change+0xb8c/0x12b0 fs/attr.c:424 do_truncate+0x13c/0x200 fs/open.c:65 do_sys_ftruncate+0x536/0x730 fs/open.c:193 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x46/0xb0 RIP: 0033:0x7fb29d900899 Code: 28 00 00 00 75 05 48 83 c4 28 c3 e8 11 15 00 00 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 b8 ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007fb29d8b2318 EFLAGS: 00000246 ORIG_RAX: 000000000000004d RAX: ffffffffffffffda RBX: 00007fb29d988408 RCX: 00007fb29d900899 RDX: 00007fb29d900899 RSI: 0000000000000005 RDI: 0000000000000003 RBP: 00007fb29d988400 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 00007fb29d98840c R13: 00007ffca01a23bf R14: 00007fb29d8b2400 R15: 0000000000022000 Modules linked in: CR2: ffff888021f7e005 ---[ end trace 0000000000000000 ]--- Eric Biggers suggested that this happens when secretmem_setattr()->simple_setattr() races with secretmem_fault() so that a page that is faulted in by secretmem_fault() (and thus removed from the direct map) is zeroed by inode truncation right afterwards. Use mapping->invalidate_lock to make secretmem_fault() and secretmem_setattr() mutually exclusive. [rppt@linux.ibm.com: v3] Link: https://lkml.kernel.org/r/20220714091337.412297-1-rppt@kernel.org Link: https://lkml.kernel.org/r/20220707165650.248088-1-rppt@kernel.org Reported-by: syzbot+9bd2b7adbd34b30b87e4@syzkaller.appspotmail.com Signed-off-by: Mike Rapoport Suggested-by: Eric Biggers Reviewed-by: Axel Rasmussen Reviewed-by: Jan Kara Cc: Eric Biggers Cc: Hillf Danton Cc: Matthew Wilcox Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- mm/secretmem.c | 33 ++++++++++++++++++++++++++------- 1 file changed, 26 insertions(+), 7 deletions(-) diff --git a/mm/secretmem.c b/mm/secretmem.c index 5a62ef3bcfcff4..14f49c0aa66e20 100644 --- a/mm/secretmem.c +++ b/mm/secretmem.c @@ -55,22 +55,28 @@ static vm_fault_t secretmem_fault(struct vm_fault *vmf) gfp_t gfp = vmf->gfp_mask; unsigned long addr; struct page *page; + vm_fault_t ret; int err; if (((loff_t)vmf->pgoff << PAGE_SHIFT) >= i_size_read(inode)) return vmf_error(-EINVAL); + filemap_invalidate_lock_shared(mapping); + retry: page = find_lock_page(mapping, offset); if (!page) { page = alloc_page(gfp | __GFP_ZERO); - if (!page) - return VM_FAULT_OOM; + if (!page) { + ret = VM_FAULT_OOM; + goto out; + } err = set_direct_map_invalid_noflush(page); if (err) { put_page(page); - return vmf_error(err); + ret = vmf_error(err); + goto out; } __SetPageUptodate(page); @@ -86,7 +92,8 @@ static vm_fault_t secretmem_fault(struct vm_fault *vmf) if (err == -EEXIST) goto retry; - return vmf_error(err); + ret = vmf_error(err); + goto out; } addr = (unsigned long)page_address(page); @@ -94,7 +101,11 @@ static vm_fault_t secretmem_fault(struct vm_fault *vmf) } vmf->page = page; - return VM_FAULT_LOCKED; + ret = VM_FAULT_LOCKED; + +out: + filemap_invalidate_unlock_shared(mapping); + return ret; } static const struct vm_operations_struct secretmem_vm_ops = { @@ -162,12 +173,20 @@ static int secretmem_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, struct iattr *iattr) { struct inode *inode = d_inode(dentry); + struct address_space *mapping = inode->i_mapping; unsigned int ia_valid = iattr->ia_valid; + int ret; + + filemap_invalidate_lock(mapping); if ((ia_valid & ATTR_SIZE) && inode->i_size) - return -EINVAL; + ret = -EINVAL; + else + ret = simple_setattr(mnt_userns, dentry, iattr); - return simple_setattr(mnt_userns, dentry, iattr); + filemap_invalidate_unlock(mapping); + + return ret; } static const struct inode_operations secretmem_iops = { From 2722fb0f70286d0f31e8abad8220b453e0030a3d Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Tue, 5 Jul 2022 16:00:36 -0400 Subject: [PATCH 0961/1056] mm: fix page leak with multiple threads mapping the same page commit 3fe2895cfecd03ac74977f32102b966b6589f481 upstream. We have an application with a lot of threads that use a shared mmap backed by tmpfs mounted with -o huge=within_size. This application started leaking loads of huge pages when we upgraded to a recent kernel. Using the page ref tracepoints and a BPF program written by Tejun Heo we were able to determine that these pages would have multiple refcounts from the page fault path, but when it came to unmap time we wouldn't drop the number of refs we had added from the faults. I wrote a reproducer that mmap'ed a file backed by tmpfs with -o huge=always, and then spawned 20 threads all looping faulting random offsets in this map, while using madvise(MADV_DONTNEED) randomly for huge page aligned ranges. This very quickly reproduced the problem. The problem here is that we check for the case that we have multiple threads faulting in a range that was previously unmapped. One thread maps the PMD, the other thread loses the race and then returns 0. However at this point we already have the page, and we are no longer putting this page into the processes address space, and so we leak the page. We actually did the correct thing prior to f9ce0be71d1f, however it looks like Kirill copied what we do in the anonymous page case. In the anonymous page case we don't yet have a page, so we don't have to drop a reference on anything. Previously we did the correct thing for file based faults by returning VM_FAULT_NOPAGE so we correctly drop the reference on the page we faulted in. Fix this by returning VM_FAULT_NOPAGE in the pmd_devmap_trans_unstable() case, this makes us drop the ref on the page properly, and now my reproducer no longer leaks the huge pages. [josef@toxicpanda.com: v2] Link: https://lkml.kernel.org/r/e90c8f0dbae836632b669c2afc434006a00d4a67.1657721478.git.josef@toxicpanda.com Link: https://lkml.kernel.org/r/2b798acfd95c9ab9395fe85e8d5a835e2e10a920.1657051137.git.josef@toxicpanda.com Fixes: f9ce0be71d1f ("mm: Cleanup faultaround and finish_fault() codepaths") Signed-off-by: Josef Bacik Signed-off-by: Rik van Riel Signed-off-by: Chris Mason Acked-by: Kirill A. Shutemov Cc: Matthew Wilcox (Oracle) Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- mm/memory.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/mm/memory.c b/mm/memory.c index 036eb765c4e808..a4d0f744a458d7 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -4071,9 +4071,12 @@ vm_fault_t finish_fault(struct vm_fault *vmf) } } - /* See comment in handle_pte_fault() */ + /* + * See comment in handle_pte_fault() for how this scenario happens, we + * need to return NOPAGE so that we drop this page. + */ if (pmd_devmap_trans_unstable(vmf->pmd)) - return 0; + return VM_FAULT_NOPAGE; vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, vmf->address, &vmf->ptl); From dc124c849c72818b68c9eae9bf348ce605f70e9a Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Sat, 9 Jul 2022 17:26:29 +0800 Subject: [PATCH 0962/1056] hugetlb: fix memoryleak in hugetlb_mcopy_atomic_pte commit da9a298f5fad0dc615079a340da42928bc5b138e upstream. When alloc_huge_page fails, *pagep is set to NULL without put_page first. So the hugepage indicated by *pagep is leaked. Link: https://lkml.kernel.org/r/20220709092629.54291-1-linmiaohe@huawei.com Fixes: 8cc5fcbb5be8 ("mm, hugetlb: fix racy resv_huge_pages underflow on UFFDIO_COPY") Signed-off-by: Miaohe Lin Acked-by: Muchun Song Reviewed-by: Anshuman Khandual Reviewed-by: Baolin Wang Reviewed-by: Mike Kravetz Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- mm/hugetlb.c | 1 + 1 file changed, 1 insertion(+) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index eed96302897aeb..405793b8cf0d2c 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -5314,6 +5314,7 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm, page = alloc_huge_page(dst_vma, dst_addr, 0); if (IS_ERR(page)) { + put_page(*pagep); ret = -ENOMEM; *pagep = NULL; goto out; From 71f71150115ae24cfab8e803bc78f731b3f4cc59 Mon Sep 17 00:00:00 2001 From: Lukas Bulwahn Date: Fri, 22 Jul 2022 13:07:11 +0200 Subject: [PATCH 0963/1056] asm-generic: remove a broken and needless ifdef conditional commit e2a619ca0b38f2114347b7078b8a67d72d457a3d upstream. Commit 527701eda5f1 ("lib: Add a generic version of devmem_is_allowed()") introduces the config symbol GENERIC_LIB_DEVMEM_IS_ALLOWED, but then falsely refers to CONFIG_GENERIC_DEVMEM_IS_ALLOWED (note the missing LIB in the reference) in ./include/asm-generic/io.h. Luckily, ./scripts/checkkconfigsymbols.py warns on non-existing configs: GENERIC_DEVMEM_IS_ALLOWED Referencing files: include/asm-generic/io.h The actual fix, though, is simply to not to make this function declaration dependent on any kernel config. For architectures that intend to use the generic version, the arch's 'select GENERIC_LIB_DEVMEM_IS_ALLOWED' will lead to picking the function definition, and for other architectures, this function is simply defined elsewhere. The wrong '#ifndef' on a non-existing config symbol also always had the same effect (although more by mistake than by intent). So, there is no functional change. Remove this broken and needless ifdef conditional. Fixes: 527701eda5f1 ("lib: Add a generic version of devmem_is_allowed()") Signed-off-by: Lukas Bulwahn Signed-off-by: Arnd Bergmann Signed-off-by: Greg Kroah-Hartman --- include/asm-generic/io.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/include/asm-generic/io.h b/include/asm-generic/io.h index 7ce93aaf69f8db..98954dda573444 100644 --- a/include/asm-generic/io.h +++ b/include/asm-generic/io.h @@ -1125,9 +1125,7 @@ static inline void memcpy_toio(volatile void __iomem *addr, const void *buffer, } #endif -#ifndef CONFIG_GENERIC_DEVMEM_IS_ALLOWED extern int devmem_is_allowed(unsigned long pfn); -#endif #endif /* __KERNEL__ */ From 8bd9747d30668606f0a5f92c2fa00bf88266a423 Mon Sep 17 00:00:00 2001 From: Harald Freudenberger Date: Wed, 13 Jul 2022 15:17:21 +0200 Subject: [PATCH 0964/1056] s390/archrandom: prevent CPACF trng invocations in interrupt context commit 918e75f77af7d2e049bb70469ec0a2c12782d96a upstream. This patch slightly reworks the s390 arch_get_random_seed_{int,long} implementation: Make sure the CPACF trng instruction is never called in any interrupt context. This is done by adding an additional condition in_task(). Justification: There are some constrains to satisfy for the invocation of the arch_get_random_seed_{int,long}() functions: - They should provide good random data during kernel initialization. - They should not be called in interrupt context as the TRNG instruction is relatively heavy weight and may for example make some network loads cause to timeout and buck. However, it was not clear what kind of interrupt context is exactly encountered during kernel init or network traffic eventually calling arch_get_random_seed_long(). After some days of investigations it is clear that the s390 start_kernel function is not running in any interrupt context and so the trng is called: Jul 11 18:33:39 t35lp54 kernel: [<00000001064e90ca>] arch_get_random_seed_long.part.0+0x32/0x70 Jul 11 18:33:39 t35lp54 kernel: [<000000010715f246>] random_init+0xf6/0x238 Jul 11 18:33:39 t35lp54 kernel: [<000000010712545c>] start_kernel+0x4a4/0x628 Jul 11 18:33:39 t35lp54 kernel: [<000000010590402a>] startup_continue+0x2a/0x40 The condition in_task() is true and the CPACF trng provides random data during kernel startup. The network traffic however, is more difficult. A typical call stack looks like this: Jul 06 17:37:07 t35lp54 kernel: [<000000008b5600fc>] extract_entropy.constprop.0+0x23c/0x240 Jul 06 17:37:07 t35lp54 kernel: [<000000008b560136>] crng_reseed+0x36/0xd8 Jul 06 17:37:07 t35lp54 kernel: [<000000008b5604b8>] crng_make_state+0x78/0x340 Jul 06 17:37:07 t35lp54 kernel: [<000000008b5607e0>] _get_random_bytes+0x60/0xf8 Jul 06 17:37:07 t35lp54 kernel: [<000000008b56108a>] get_random_u32+0xda/0x248 Jul 06 17:37:07 t35lp54 kernel: [<000000008aefe7a8>] kfence_guarded_alloc+0x48/0x4b8 Jul 06 17:37:07 t35lp54 kernel: [<000000008aeff35e>] __kfence_alloc+0x18e/0x1b8 Jul 06 17:37:07 t35lp54 kernel: [<000000008aef7f10>] __kmalloc_node_track_caller+0x368/0x4d8 Jul 06 17:37:07 t35lp54 kernel: [<000000008b611eac>] kmalloc_reserve+0x44/0xa0 Jul 06 17:37:07 t35lp54 kernel: [<000000008b611f98>] __alloc_skb+0x90/0x178 Jul 06 17:37:07 t35lp54 kernel: [<000000008b6120dc>] __napi_alloc_skb+0x5c/0x118 Jul 06 17:37:07 t35lp54 kernel: [<000000008b8f06b4>] qeth_extract_skb+0x13c/0x680 Jul 06 17:37:07 t35lp54 kernel: [<000000008b8f6526>] qeth_poll+0x256/0x3f8 Jul 06 17:37:07 t35lp54 kernel: [<000000008b63d76e>] __napi_poll.constprop.0+0x46/0x2f8 Jul 06 17:37:07 t35lp54 kernel: [<000000008b63dbec>] net_rx_action+0x1cc/0x408 Jul 06 17:37:07 t35lp54 kernel: [<000000008b937302>] __do_softirq+0x132/0x6b0 Jul 06 17:37:07 t35lp54 kernel: [<000000008abf46ce>] __irq_exit_rcu+0x13e/0x170 Jul 06 17:37:07 t35lp54 kernel: [<000000008abf531a>] irq_exit_rcu+0x22/0x50 Jul 06 17:37:07 t35lp54 kernel: [<000000008b922506>] do_io_irq+0xe6/0x198 Jul 06 17:37:07 t35lp54 kernel: [<000000008b935826>] io_int_handler+0xd6/0x110 Jul 06 17:37:07 t35lp54 kernel: [<000000008b9358a6>] psw_idle_exit+0x0/0xa Jul 06 17:37:07 t35lp54 kernel: ([<000000008ab9c59a>] arch_cpu_idle+0x52/0xe0) Jul 06 17:37:07 t35lp54 kernel: [<000000008b933cfe>] default_idle_call+0x6e/0xd0 Jul 06 17:37:07 t35lp54 kernel: [<000000008ac59f4e>] do_idle+0xf6/0x1b0 Jul 06 17:37:07 t35lp54 kernel: [<000000008ac5a28e>] cpu_startup_entry+0x36/0x40 Jul 06 17:37:07 t35lp54 kernel: [<000000008abb0d90>] smp_start_secondary+0x148/0x158 Jul 06 17:37:07 t35lp54 kernel: [<000000008b935b9e>] restart_int_handler+0x6e/0x90 which confirms that the call is in softirq context. So in_task() covers exactly the cases where we want to have CPACF trng called: not in nmi, not in hard irq, not in soft irq but in normal task context and during kernel init. Signed-off-by: Harald Freudenberger Acked-by: Jason A. Donenfeld Reviewed-by: Juergen Christ Link: https://lore.kernel.org/r/20220713131721.257907-1-freude@linux.ibm.com Fixes: e4f74400308c ("s390/archrandom: simplify back to earlier design and initialize earlier") [agordeev@linux.ibm.com changed desc, added Fixes and Link, removed -stable] Signed-off-by: Alexander Gordeev Signed-off-by: Greg Kroah-Hartman --- arch/s390/include/asm/archrandom.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/arch/s390/include/asm/archrandom.h b/arch/s390/include/asm/archrandom.h index 2c6e1c6ecbe780..4120c428dc378f 100644 --- a/arch/s390/include/asm/archrandom.h +++ b/arch/s390/include/asm/archrandom.h @@ -2,7 +2,7 @@ /* * Kernel interface for the s390 arch_random_* functions * - * Copyright IBM Corp. 2017, 2020 + * Copyright IBM Corp. 2017, 2022 * * Author: Harald Freudenberger * @@ -14,6 +14,7 @@ #ifdef CONFIG_ARCH_RANDOM #include +#include #include #include @@ -32,7 +33,8 @@ static inline bool __must_check arch_get_random_int(unsigned int *v) static inline bool __must_check arch_get_random_seed_long(unsigned long *v) { - if (static_branch_likely(&s390_arch_random_available)) { + if (static_branch_likely(&s390_arch_random_available) && + in_task()) { cpacf_trng(NULL, 0, (u8 *)v, sizeof(*v)); atomic64_add(sizeof(*v), &s390_arch_random_counter); return true; @@ -42,7 +44,8 @@ static inline bool __must_check arch_get_random_seed_long(unsigned long *v) static inline bool __must_check arch_get_random_seed_int(unsigned int *v) { - if (static_branch_likely(&s390_arch_random_available)) { + if (static_branch_likely(&s390_arch_random_available) && + in_task()) { cpacf_trng(NULL, 0, (u8 *)v, sizeof(*v)); atomic64_add(sizeof(*v), &s390_arch_random_counter); return true; From 121c8993d4f37798880ca04b017decabc4d9da25 Mon Sep 17 00:00:00 2001 From: Alistair Popple Date: Wed, 20 Jul 2022 16:27:45 +1000 Subject: [PATCH 0965/1056] nouveau/svm: Fix to migrate all requested pages commit 66cee9097e2b74ff3c8cc040ce5717c521a0c3fa upstream. Users may request that pages from an OpenCL SVM allocation be migrated to the GPU with clEnqueueSVMMigrateMem(). In Nouveau this will call into nouveau_dmem_migrate_vma() to do the migration. If the total range to be migrated exceeds SG_MAX_SINGLE_ALLOC the pages will be migrated in chunks of size SG_MAX_SINGLE_ALLOC. However a typo in updating the starting address means that only the first chunk will get migrated. Fix the calculation so that the entire range will get migrated if possible. Signed-off-by: Alistair Popple Fixes: e3d8b0890469 ("drm/nouveau/svm: map pages after migration") Reviewed-by: Ralph Campbell Reviewed-by: Lyude Paul Signed-off-by: Lyude Paul Link: https://patchwork.freedesktop.org/patch/msgid/20220720062745.960701-1-apopple@nvidia.com Cc: # v5.8+ Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/nouveau/nouveau_dmem.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_dmem.c b/drivers/gpu/drm/nouveau/nouveau_dmem.c index 92987daa5e17d0..5e72e6cb2f840b 100644 --- a/drivers/gpu/drm/nouveau/nouveau_dmem.c +++ b/drivers/gpu/drm/nouveau/nouveau_dmem.c @@ -679,7 +679,11 @@ nouveau_dmem_migrate_vma(struct nouveau_drm *drm, goto out_free_dma; for (i = 0; i < npages; i += max) { - args.end = start + (max << PAGE_SHIFT); + if (args.start + (max << PAGE_SHIFT) > end) + args.end = end; + else + args.end = args.start + (max << PAGE_SHIFT); + ret = migrate_vma_setup(&args); if (ret) goto out_free_pfns; From 11c1cc3f6e42e238659b1b65d9a2f093991d5606 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Mon, 25 Jul 2022 16:36:29 -0700 Subject: [PATCH 0966/1056] drm/simpledrm: Fix return type of simpledrm_simple_display_pipe_mode_valid() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 0c09bc33aa8e9dc867300acaadc318c2f0d85a1e upstream. When booting a kernel compiled with clang's CFI protection (CONFIG_CFI_CLANG), there is a CFI failure in drm_simple_kms_crtc_mode_valid() when trying to call simpledrm_simple_display_pipe_mode_valid() through ->mode_valid(): [ 0.322802] CFI failure (target: simpledrm_simple_display_pipe_mode_valid+0x0/0x8): ... [ 0.324928] Call trace: [ 0.324969] __ubsan_handle_cfi_check_fail+0x58/0x60 [ 0.325053] __cfi_check_fail+0x3c/0x44 [ 0.325120] __cfi_slowpath_diag+0x178/0x200 [ 0.325192] drm_simple_kms_crtc_mode_valid+0x58/0x80 [ 0.325279] __drm_helper_update_and_validate+0x31c/0x464 ... The ->mode_valid() member in 'struct drm_simple_display_pipe_funcs' expects a return type of 'enum drm_mode_status', not 'int'. Correct it to fix the CFI failure. Cc: stable@vger.kernel.org Fixes: 11e8f5fd223b ("drm: Add simpledrm driver") Link: https://github.com/ClangBuiltLinux/linux/issues/1647 Reported-by: Tomasz Paweł Gajc Signed-off-by: Nathan Chancellor Signed-off-by: Thomas Zimmermann Reviewed-by: Sami Tolvanen Link: https://patchwork.freedesktop.org/patch/msgid/20220725233629.223223-1-nathan@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/tiny/simpledrm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/tiny/simpledrm.c b/drivers/gpu/drm/tiny/simpledrm.c index 3e3f9ba1e8858d..f3c2c173ca4b5e 100644 --- a/drivers/gpu/drm/tiny/simpledrm.c +++ b/drivers/gpu/drm/tiny/simpledrm.c @@ -614,7 +614,7 @@ static const struct drm_connector_funcs simpledrm_connector_funcs = { .atomic_destroy_state = drm_atomic_helper_connector_destroy_state, }; -static int +static enum drm_mode_status simpledrm_simple_display_pipe_mode_valid(struct drm_simple_display_pipe *pipe, const struct drm_display_mode *mode) { From 093610f216d0722d375af048fb1c0e565003407f Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 28 Jul 2022 10:31:06 +0100 Subject: [PATCH 0967/1056] watch_queue: Fix missing rcu annotation commit e0339f036ef4beb9b20f0b6532a1e0ece7f594c6 upstream. Since __post_watch_notification() walks wlist->watchers with only the RCU read lock held, we need to use RCU methods to add to the list (we already use RCU methods to remove from the list). Fix add_watch_to_object() to use hlist_add_head_rcu() instead of hlist_add_head() for that list. Fixes: c73be61cede5 ("pipe: Add general notification queue support") Signed-off-by: David Howells Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- kernel/watch_queue.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/watch_queue.c b/kernel/watch_queue.c index debebcd2664ef1..7019d337ce861c 100644 --- a/kernel/watch_queue.c +++ b/kernel/watch_queue.c @@ -497,7 +497,7 @@ int add_watch_to_object(struct watch *watch, struct watch_list *wlist) unlock_wqueue(wqueue); } - hlist_add_head(&watch->list_node, &wlist->watchers); + hlist_add_head_rcu(&watch->list_node, &wlist->watchers); return 0; } EXPORT_SYMBOL(add_watch_to_object); From c9c01dd38975c70e3ec9a4b95c1c9ab66989589c Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 28 Jul 2022 10:31:12 +0100 Subject: [PATCH 0968/1056] watch_queue: Fix missing locking in add_watch_to_object() commit e64ab2dbd882933b65cd82ff6235d705ad65dbb6 upstream. If a watch is being added to a queue, it needs to guard against interference from addition of a new watch, manual removal of a watch and removal of a watch due to some other queue being destroyed. KEYCTL_WATCH_KEY guards against this for the same {key,queue} pair by holding the key->sem writelocked and by holding refs on both the key and the queue - but that doesn't prevent interaction from other {key,queue} pairs. While add_watch_to_object() does take the spinlock on the event queue, it doesn't take the lock on the source's watch list. The assumption was that the caller would prevent that (say by taking key->sem) - but that doesn't prevent interference from the destruction of another queue. Fix this by locking the watcher list in add_watch_to_object(). Fixes: c73be61cede5 ("pipe: Add general notification queue support") Reported-by: syzbot+03d7b43290037d1f87ca@syzkaller.appspotmail.com Signed-off-by: David Howells cc: keyrings@vger.kernel.org Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- kernel/watch_queue.c | 58 +++++++++++++++++++++++++++----------------- 1 file changed, 36 insertions(+), 22 deletions(-) diff --git a/kernel/watch_queue.c b/kernel/watch_queue.c index 7019d337ce861c..1059ef6c3711ad 100644 --- a/kernel/watch_queue.c +++ b/kernel/watch_queue.c @@ -457,6 +457,33 @@ void init_watch(struct watch *watch, struct watch_queue *wqueue) rcu_assign_pointer(watch->queue, wqueue); } +static int add_one_watch(struct watch *watch, struct watch_list *wlist, struct watch_queue *wqueue) +{ + const struct cred *cred; + struct watch *w; + + hlist_for_each_entry(w, &wlist->watchers, list_node) { + struct watch_queue *wq = rcu_access_pointer(w->queue); + if (wqueue == wq && watch->id == w->id) + return -EBUSY; + } + + cred = current_cred(); + if (atomic_inc_return(&cred->user->nr_watches) > task_rlimit(current, RLIMIT_NOFILE)) { + atomic_dec(&cred->user->nr_watches); + return -EAGAIN; + } + + watch->cred = get_cred(cred); + rcu_assign_pointer(watch->watch_list, wlist); + + kref_get(&wqueue->usage); + kref_get(&watch->usage); + hlist_add_head(&watch->queue_node, &wqueue->watches); + hlist_add_head_rcu(&watch->list_node, &wlist->watchers); + return 0; +} + /** * add_watch_to_object - Add a watch on an object to a watch list * @watch: The watch to add @@ -471,34 +498,21 @@ void init_watch(struct watch *watch, struct watch_queue *wqueue) */ int add_watch_to_object(struct watch *watch, struct watch_list *wlist) { - struct watch_queue *wqueue = rcu_access_pointer(watch->queue); - struct watch *w; - - hlist_for_each_entry(w, &wlist->watchers, list_node) { - struct watch_queue *wq = rcu_access_pointer(w->queue); - if (wqueue == wq && watch->id == w->id) - return -EBUSY; - } - - watch->cred = get_current_cred(); - rcu_assign_pointer(watch->watch_list, wlist); + struct watch_queue *wqueue; + int ret = -ENOENT; - if (atomic_inc_return(&watch->cred->user->nr_watches) > - task_rlimit(current, RLIMIT_NOFILE)) { - atomic_dec(&watch->cred->user->nr_watches); - put_cred(watch->cred); - return -EAGAIN; - } + rcu_read_lock(); + wqueue = rcu_access_pointer(watch->queue); if (lock_wqueue(wqueue)) { - kref_get(&wqueue->usage); - kref_get(&watch->usage); - hlist_add_head(&watch->queue_node, &wqueue->watches); + spin_lock(&wlist->lock); + ret = add_one_watch(watch, wlist, wqueue); + spin_unlock(&wlist->lock); unlock_wqueue(wqueue); } - hlist_add_head_rcu(&watch->list_node, &wlist->watchers); - return 0; + rcu_read_unlock(); + return ret; } EXPORT_SYMBOL(add_watch_to_object); From 807b028115eb155f6e651ef14a57f36952b654e1 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 20 Jul 2022 09:50:12 -0700 Subject: [PATCH 0969/1056] tcp: Fix data-races around sysctl_tcp_dsack. commit 58ebb1c8b35a8ef38cd6927431e0fa7b173a632d upstream. While reading sysctl_tcp_dsack, it can be changed concurrently. Thus, we need to add READ_ONCE() to its readers. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp_input.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 2d21d8bf3b8cc0..e066c527a723fa 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -4419,7 +4419,7 @@ static void tcp_dsack_set(struct sock *sk, u32 seq, u32 end_seq) { struct tcp_sock *tp = tcp_sk(sk); - if (tcp_is_sack(tp) && sock_net(sk)->ipv4.sysctl_tcp_dsack) { + if (tcp_is_sack(tp) && READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_dsack)) { int mib_idx; if (before(seq, tp->rcv_nxt)) @@ -4466,7 +4466,7 @@ static void tcp_send_dupack(struct sock *sk, const struct sk_buff *skb) NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKLOST); tcp_enter_quickack_mode(sk, TCP_MAX_QUICKACKS); - if (tcp_is_sack(tp) && sock_net(sk)->ipv4.sysctl_tcp_dsack) { + if (tcp_is_sack(tp) && READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_dsack)) { u32 end_seq = TCP_SKB_CB(skb)->end_seq; tcp_rcv_spurious_retrans(sk, skb); From 74753ec663d744a3b62bb19dcc2217735fff87c1 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 20 Jul 2022 09:50:13 -0700 Subject: [PATCH 0970/1056] tcp: Fix a data-race around sysctl_tcp_app_win. commit 02ca527ac5581cf56749db9fd03d854e842253dd upstream. While reading sysctl_tcp_app_win, it can be changed concurrently. Thus, we need to add READ_ONCE() to its reader. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp_input.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index e066c527a723fa..1c940517f5f50d 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -526,7 +526,7 @@ static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb, */ static void tcp_init_buffer_space(struct sock *sk) { - int tcp_app_win = sock_net(sk)->ipv4.sysctl_tcp_app_win; + int tcp_app_win = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_app_win); struct tcp_sock *tp = tcp_sk(sk); int maxwin; From 0d8fa3c2a442cc0dded3708f081c8e95b1a94622 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 20 Jul 2022 09:50:14 -0700 Subject: [PATCH 0971/1056] tcp: Fix a data-race around sysctl_tcp_adv_win_scale. commit 36eeee75ef0157e42fb6593dcc65daab289b559e upstream. While reading sysctl_tcp_adv_win_scale, it can be changed concurrently. Thus, we need to add READ_ONCE() to its reader. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/net/tcp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index 8ce8aafeef0f55..76b0d7f2b967fb 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1406,7 +1406,7 @@ void tcp_select_initial_window(const struct sock *sk, int __space, static inline int tcp_win_from_space(const struct sock *sk, int space) { - int tcp_adv_win_scale = sock_net(sk)->ipv4.sysctl_tcp_adv_win_scale; + int tcp_adv_win_scale = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_adv_win_scale); return tcp_adv_win_scale <= 0 ? (space>>(-tcp_adv_win_scale)) : From 6e167ed68999487347441457fe2bed81a2d90905 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 20 Jul 2022 09:50:15 -0700 Subject: [PATCH 0972/1056] tcp: Fix a data-race around sysctl_tcp_frto. commit 706c6202a3589f290e1ef9be0584a8f4a3cc0507 upstream. While reading sysctl_tcp_frto, it can be changed concurrently. Thus, we need to add READ_ONCE() to its reader. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp_input.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 1c940517f5f50d..b9fd51826aea61 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2167,7 +2167,7 @@ void tcp_enter_loss(struct sock *sk) * loss recovery is underway except recurring timeout(s) on * the same SND.UNA (sec 3.2). Disable F-RTO on path MTU probing */ - tp->frto = net->ipv4.sysctl_tcp_frto && + tp->frto = READ_ONCE(net->ipv4.sysctl_tcp_frto) && (new_recovery || icsk->icsk_retransmits) && !inet_csk(sk)->icsk_mtup.probe_size; } From aa2ca5b5629dad5c3a238550464c0b731930741d Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 20 Jul 2022 09:50:16 -0700 Subject: [PATCH 0973/1056] tcp: Fix a data-race around sysctl_tcp_nometrics_save. commit 8499a2454d9e8a55ce616ede9f9580f36fd5b0f3 upstream. While reading sysctl_tcp_nometrics_save, it can be changed concurrently. Thus, we need to add READ_ONCE() to its reader. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp_metrics.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index a501150deaa3b3..9dcc418a26f298 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -329,7 +329,7 @@ void tcp_update_metrics(struct sock *sk) int m; sk_dst_confirm(sk); - if (net->ipv4.sysctl_tcp_nometrics_save || !dst) + if (READ_ONCE(net->ipv4.sysctl_tcp_nometrics_save) || !dst) return; rcu_read_lock(); From 62e721dee8ccf70c7652c8424f6f9fa5874c924d Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 20 Jul 2022 09:50:17 -0700 Subject: [PATCH 0974/1056] tcp: Fix data-races around sysctl_tcp_no_ssthresh_metrics_save. commit ab1ba21b523ab496b1a4a8e396333b24b0a18f9a upstream. While reading sysctl_tcp_no_ssthresh_metrics_save, it can be changed concurrently. Thus, we need to add READ_ONCE() to its readers. Fixes: 65e6d90168f3 ("net-tcp: Disable TCP ssthresh metrics cache by default") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp_metrics.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index 9dcc418a26f298..d58e672be31c76 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -385,7 +385,7 @@ void tcp_update_metrics(struct sock *sk) if (tcp_in_initial_slowstart(tp)) { /* Slow start still did not finish. */ - if (!net->ipv4.sysctl_tcp_no_ssthresh_metrics_save && + if (!READ_ONCE(net->ipv4.sysctl_tcp_no_ssthresh_metrics_save) && !tcp_metric_locked(tm, TCP_METRIC_SSTHRESH)) { val = tcp_metric_get(tm, TCP_METRIC_SSTHRESH); if (val && (tcp_snd_cwnd(tp) >> 1) > val) @@ -401,7 +401,7 @@ void tcp_update_metrics(struct sock *sk) } else if (!tcp_in_slow_start(tp) && icsk->icsk_ca_state == TCP_CA_Open) { /* Cong. avoidance phase, cwnd is reliable. */ - if (!net->ipv4.sysctl_tcp_no_ssthresh_metrics_save && + if (!READ_ONCE(net->ipv4.sysctl_tcp_no_ssthresh_metrics_save) && !tcp_metric_locked(tm, TCP_METRIC_SSTHRESH)) tcp_metric_set(tm, TCP_METRIC_SSTHRESH, max(tcp_snd_cwnd(tp) >> 1, tp->snd_ssthresh)); @@ -418,7 +418,7 @@ void tcp_update_metrics(struct sock *sk) tcp_metric_set(tm, TCP_METRIC_CWND, (val + tp->snd_ssthresh) >> 1); } - if (!net->ipv4.sysctl_tcp_no_ssthresh_metrics_save && + if (!READ_ONCE(net->ipv4.sysctl_tcp_no_ssthresh_metrics_save) && !tcp_metric_locked(tm, TCP_METRIC_SSTHRESH)) { val = tcp_metric_get(tm, TCP_METRIC_SSTHRESH); if (val && tp->snd_ssthresh > val) @@ -463,7 +463,7 @@ void tcp_init_metrics(struct sock *sk) if (tcp_metric_locked(tm, TCP_METRIC_CWND)) tp->snd_cwnd_clamp = tcp_metric_get(tm, TCP_METRIC_CWND); - val = net->ipv4.sysctl_tcp_no_ssthresh_metrics_save ? + val = READ_ONCE(net->ipv4.sysctl_tcp_no_ssthresh_metrics_save) ? 0 : tcp_metric_get(tm, TCP_METRIC_SSTHRESH); if (val) { tp->snd_ssthresh = val; From cef4c1d0fb48d59211476b960bdd722ff76d0746 Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Thu, 7 Jul 2022 12:20:42 +0200 Subject: [PATCH 0975/1056] ice: check (DD | EOF) bits on Rx descriptor rather than (EOP | RS) commit 283d736ff7c7e96ac5b32c6c0de40372f8eb171e upstream. Tx side sets EOP and RS bits on descriptors to indicate that a particular descriptor is the last one and needs to generate an irq when it was sent. These bits should not be checked on completion path regardless whether it's the Tx or the Rx. DD bit serves this purpose and it indicates that a particular descriptor is either for Rx or was successfully Txed. EOF is also set as loopback test does not xmit fragmented frames. Look at (DD | EOF) bits setting in ice_lbtest_receive_frames() instead of EOP and RS pair. Fixes: 0e674aeb0b77 ("ice: Add handler for ethtool selftest") Signed-off-by: Maciej Fijalkowski Tested-by: George Kuruvinakunnel Signed-off-by: Tony Nguyen Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/intel/ice/ice_ethtool.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c index 982db894754f7e..9b9c2b88548660 100644 --- a/drivers/net/ethernet/intel/ice/ice_ethtool.c +++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c @@ -651,7 +651,8 @@ static int ice_lbtest_receive_frames(struct ice_ring *rx_ring) rx_desc = ICE_RX_DESC(rx_ring, i); if (!(rx_desc->wb.status_error0 & - cpu_to_le16(ICE_TX_DESC_CMD_EOP | ICE_TX_DESC_CMD_RS))) + (cpu_to_le16(BIT(ICE_RX_FLEX_DESC_STATUS0_DD_S)) | + cpu_to_le16(BIT(ICE_RX_FLEX_DESC_STATUS0_EOF_S))))) continue; rx_buf = &rx_ring->rx_buf[i]; From 7f0a36506809fc54d3b70aa2544c03e03b289364 Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Thu, 7 Jul 2022 12:20:43 +0200 Subject: [PATCH 0976/1056] ice: do not setup vlan for loopback VSI commit cc019545a238518fa9da1e2a889f6e1bb1005a63 upstream. Currently loopback test is failiing due to the error returned from ice_vsi_vlan_setup(). Skip calling it when preparing loopback VSI. Fixes: 0e674aeb0b77 ("ice: Add handler for ethtool selftest") Signed-off-by: Maciej Fijalkowski Tested-by: George Kuruvinakunnel Signed-off-by: Tony Nguyen Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/intel/ice/ice_main.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 188abf36a5b23d..b9d45c7dbef18e 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -5481,10 +5481,12 @@ int ice_vsi_cfg(struct ice_vsi *vsi) if (vsi->netdev) { ice_set_rx_mode(vsi->netdev); - err = ice_vsi_vlan_setup(vsi); + if (vsi->type != ICE_VSI_LB) { + err = ice_vsi_vlan_setup(vsi); - if (err) - return err; + if (err) + return err; + } } ice_vsi_cfg_dcb_rings(vsi); From bdaf56e0df15c0a13d9b14580c478fd7463f9e45 Mon Sep 17 00:00:00 2001 From: Liang He Date: Tue, 19 Jul 2022 15:15:29 +0800 Subject: [PATCH 0977/1056] scsi: ufs: host: Hold reference returned by of_parse_phandle() commit a3435afba87dc6cd83f5595e7607f3c40f93ef01 upstream. In ufshcd_populate_vreg(), we should hold the reference returned by of_parse_phandle() and then use it to call of_node_put() for refcount balance. Link: https://lore.kernel.org/r/20220719071529.1081166-1-windhl@126.com Fixes: aa4976130934 ("ufs: Add regulator enable support") Reviewed-by: Bart Van Assche Signed-off-by: Liang He Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/ufs/ufshcd-pltfrm.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/ufs/ufshcd-pltfrm.c b/drivers/scsi/ufs/ufshcd-pltfrm.c index 87975d1a21c8b1..adc302b1a57aee 100644 --- a/drivers/scsi/ufs/ufshcd-pltfrm.c +++ b/drivers/scsi/ufs/ufshcd-pltfrm.c @@ -107,9 +107,20 @@ static int ufshcd_parse_clock_info(struct ufs_hba *hba) return ret; } +static bool phandle_exists(const struct device_node *np, + const char *phandle_name, int index) +{ + struct device_node *parse_np = of_parse_phandle(np, phandle_name, index); + + if (parse_np) + of_node_put(parse_np); + + return parse_np != NULL; +} + #define MAX_PROP_SIZE 32 static int ufshcd_populate_vreg(struct device *dev, const char *name, - struct ufs_vreg **out_vreg) + struct ufs_vreg **out_vreg) { char prop_name[MAX_PROP_SIZE]; struct ufs_vreg *vreg = NULL; @@ -121,7 +132,7 @@ static int ufshcd_populate_vreg(struct device *dev, const char *name, } snprintf(prop_name, MAX_PROP_SIZE, "%s-supply", name); - if (!of_parse_phandle(np, prop_name, 0)) { + if (!phandle_exists(np, prop_name, 0)) { dev_info(dev, "%s: Unable to find %s regulator, assuming enabled\n", __func__, prop_name); goto out; From 927c5cf0ba3e15d601bcc5cc0ee71f2422754b61 Mon Sep 17 00:00:00 2001 From: Wei Wang Date: Thu, 21 Jul 2022 20:44:04 +0000 Subject: [PATCH 0978/1056] Revert "tcp: change pingpong threshold to 3" commit 4d8f24eeedc58d5f87b650ddda73c16e8ba56559 upstream. This reverts commit 4a41f453bedfd5e9cd040bad509d9da49feb3e2c. This to-be-reverted commit was meant to apply a stricter rule for the stack to enter pingpong mode. However, the condition used to check for interactive session "before(tp->lsndtime, icsk->icsk_ack.lrcvtime)" is jiffy based and might be too coarse, which delays the stack entering pingpong mode. We revert this patch so that we no longer use the above condition to determine interactive session, and also reduce pingpong threshold to 1. Fixes: 4a41f453bedf ("tcp: change pingpong threshold to 3") Reported-by: LemmyHuang Suggested-by: Neal Cardwell Signed-off-by: Wei Wang Acked-by: Neal Cardwell Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20220721204404.388396-1-weiwan@google.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- include/net/inet_connection_sock.h | 10 +--------- net/ipv4/tcp_output.c | 15 ++++++--------- 2 files changed, 7 insertions(+), 18 deletions(-) diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index fa6a87246a7b85..695ed45841f06d 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -315,7 +315,7 @@ void inet_csk_update_fastreuse(struct inet_bind_bucket *tb, struct dst_entry *inet_csk_update_pmtu(struct sock *sk, u32 mtu); -#define TCP_PINGPONG_THRESH 3 +#define TCP_PINGPONG_THRESH 1 static inline void inet_csk_enter_pingpong_mode(struct sock *sk) { @@ -332,14 +332,6 @@ static inline bool inet_csk_in_pingpong_mode(struct sock *sk) return inet_csk(sk)->icsk_ack.pingpong >= TCP_PINGPONG_THRESH; } -static inline void inet_csk_inc_pingpong_cnt(struct sock *sk) -{ - struct inet_connection_sock *icsk = inet_csk(sk); - - if (icsk->icsk_ack.pingpong < U8_MAX) - icsk->icsk_ack.pingpong++; -} - static inline bool inet_csk_has_ulp(struct sock *sk) { return inet_sk(sk)->is_icsk && !!inet_csk(sk)->icsk_ulp_ops; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index caf9283f9b0f94..43ecbd79663585 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -167,16 +167,13 @@ static void tcp_event_data_sent(struct tcp_sock *tp, if (tcp_packets_in_flight(tp) == 0) tcp_ca_event(sk, CA_EVENT_TX_START); - /* If this is the first data packet sent in response to the - * previous received data, - * and it is a reply for ato after last received packet, - * increase pingpong count. - */ - if (before(tp->lsndtime, icsk->icsk_ack.lrcvtime) && - (u32)(now - icsk->icsk_ack.lrcvtime) < icsk->icsk_ack.ato) - inet_csk_inc_pingpong_cnt(sk); - tp->lsndtime = now; + + /* If it is a reply for ato after last received + * packet, enter pingpong mode. + */ + if ((u32)(now - icsk->icsk_ack.lrcvtime) < icsk->icsk_ack.ato) + inet_csk_enter_pingpong_mode(sk); } /* Account for an ACK we sent. */ From 59e2332846d8919d90e299e19a87045dd63e3ef0 Mon Sep 17 00:00:00 2001 From: Subbaraya Sundeep Date: Sun, 24 Jul 2022 13:51:14 +0530 Subject: [PATCH 0979/1056] octeontx2-pf: Fix UDP/TCP src and dst port tc filters commit 59e1be6f83b928a04189bbf3ab683a1fc6248db3 upstream. Check the mask for non-zero value before installing tc filters for L4 source and destination ports. Otherwise installing a filter for source port installs destination port too and vice-versa. Fixes: 1d4d9e42c240 ("octeontx2-pf: Add tc flower hardware offload on ingress traffic") Signed-off-by: Subbaraya Sundeep Signed-off-by: Sunil Goutham Signed-off-by: Paolo Abeni Signed-off-by: Greg Kroah-Hartman --- .../ethernet/marvell/octeontx2/nic/otx2_tc.c | 30 +++++++++++-------- 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c index 626961a41089d0..5bcefc06ce5c46 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c @@ -571,21 +571,27 @@ static int otx2_tc_prepare_flow(struct otx2_nic *nic, struct otx2_tc_flow *node, flow_spec->dport = match.key->dst; flow_mask->dport = match.mask->dst; - if (ip_proto == IPPROTO_UDP) - req->features |= BIT_ULL(NPC_DPORT_UDP); - else if (ip_proto == IPPROTO_TCP) - req->features |= BIT_ULL(NPC_DPORT_TCP); - else if (ip_proto == IPPROTO_SCTP) - req->features |= BIT_ULL(NPC_DPORT_SCTP); + + if (flow_mask->dport) { + if (ip_proto == IPPROTO_UDP) + req->features |= BIT_ULL(NPC_DPORT_UDP); + else if (ip_proto == IPPROTO_TCP) + req->features |= BIT_ULL(NPC_DPORT_TCP); + else if (ip_proto == IPPROTO_SCTP) + req->features |= BIT_ULL(NPC_DPORT_SCTP); + } flow_spec->sport = match.key->src; flow_mask->sport = match.mask->src; - if (ip_proto == IPPROTO_UDP) - req->features |= BIT_ULL(NPC_SPORT_UDP); - else if (ip_proto == IPPROTO_TCP) - req->features |= BIT_ULL(NPC_SPORT_TCP); - else if (ip_proto == IPPROTO_SCTP) - req->features |= BIT_ULL(NPC_SPORT_SCTP); + + if (flow_mask->sport) { + if (ip_proto == IPPROTO_UDP) + req->features |= BIT_ULL(NPC_SPORT_UDP); + else if (ip_proto == IPPROTO_TCP) + req->features |= BIT_ULL(NPC_SPORT_TCP); + else if (ip_proto == IPPROTO_SCTP) + req->features |= BIT_ULL(NPC_SPORT_SCTP); + } } return otx2_tc_parse_actions(nic, &rule->action, req, f, node); From 664a3311e6716c2361cf64a38a00a0ef8cb78b38 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 20 Jul 2022 09:50:18 -0700 Subject: [PATCH 0980/1056] tcp: Fix data-races around sysctl_tcp_moderate_rcvbuf. commit 780476488844e070580bfc9e3bc7832ec1cea883 upstream. While reading sysctl_tcp_moderate_rcvbuf, it can be changed concurrently. Thus, we need to add READ_ONCE() to its readers. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp_input.c | 2 +- net/mptcp/protocol.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index b9fd51826aea61..426f8fe028503e 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -716,7 +716,7 @@ void tcp_rcv_space_adjust(struct sock *sk) * */ - if (sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf && + if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf) && !(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) { int rcvmem, rcvbuf; u64 rcvwin, grow; diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index d6def23b8cba0c..01ede89e3c46cb 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -1881,7 +1881,7 @@ static void mptcp_rcv_space_adjust(struct mptcp_sock *msk, int copied) if (msk->rcvq_space.copied <= msk->rcvq_space.space) goto new_measure; - if (sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf && + if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf) && !(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) { int rcvmem, rcvbuf; u64 rcvwin, grow; From a88de75673e4f898564f90feb61e5d625e03c4c4 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 20 Jul 2022 09:50:20 -0700 Subject: [PATCH 0981/1056] tcp: Fix a data-race around sysctl_tcp_limit_output_bytes. commit 9fb90193fbd66b4c5409ef729fd081861f8b6351 upstream. While reading sysctl_tcp_limit_output_bytes, it can be changed concurrently. Thus, we need to add READ_ONCE() to its reader. Fixes: 46d3ceabd8d9 ("tcp: TCP Small Queues") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp_output.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 43ecbd79663585..4389ad7e4c8ca1 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2503,7 +2503,7 @@ static bool tcp_small_queue_check(struct sock *sk, const struct sk_buff *skb, sk->sk_pacing_rate >> READ_ONCE(sk->sk_pacing_shift)); if (sk->sk_pacing_status == SK_PACING_NONE) limit = min_t(unsigned long, limit, - sock_net(sk)->ipv4.sysctl_tcp_limit_output_bytes); + READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_limit_output_bytes)); limit <<= factor; if (static_branch_unlikely(&tcp_tx_delay_enabled) && From ff2932ac8ee1ce6f66ba1b5017843c87492cd1a8 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 20 Jul 2022 09:50:21 -0700 Subject: [PATCH 0982/1056] tcp: Fix a data-race around sysctl_tcp_challenge_ack_limit. commit db3815a2fa691da145cfbe834584f31ad75df9ff upstream. While reading sysctl_tcp_challenge_ack_limit, it can be changed concurrently. Thus, we need to add READ_ONCE() to its reader. Fixes: 282f23c6ee34 ("tcp: implement RFC 5961 3.2") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp_input.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 426f8fe028503e..a5357ebfbcc0fa 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3622,7 +3622,7 @@ static void tcp_send_challenge_ack(struct sock *sk, const struct sk_buff *skb) /* Then check host-wide RFC 5961 rate limit. */ now = jiffies / HZ; if (now != challenge_timestamp) { - u32 ack_limit = net->ipv4.sysctl_tcp_challenge_ack_limit; + u32 ack_limit = READ_ONCE(net->ipv4.sysctl_tcp_challenge_ack_limit); u32 half = (ack_limit + 1) >> 1; challenge_timestamp = now; From 3d3e41069b65749ad87e00036574c4d7bbedd191 Mon Sep 17 00:00:00 2001 From: Jason Yan Date: Wed, 20 Jul 2022 10:51:20 +0800 Subject: [PATCH 0983/1056] scsi: core: Fix warning in scsi_alloc_sgtables() commit d9a434fa0c12ed5f7afe1e9dd30003ab5d059b85 upstream. As explained in SG_IO howto[1]: "If iovec_count is non-zero then 'dxfer_len' should be equal to the sum of iov_len lengths. If not, the minimum of the two is the transfer length." When iovec_count is non-zero and dxfer_len is zero, the sg_io() just genarated a null bio, and finally caused a warning below. To fix it, skip generating a bio for this request if dxfer_len is zero. [1] https://tldp.org/HOWTO/SCSI-Generic-HOWTO/x198.html WARNING: CPU: 2 PID: 3643 at drivers/scsi/scsi_lib.c:1032 scsi_alloc_sgtables+0xc7d/0xf70 drivers/scsi/scsi_lib.c:1032 Modules linked in: CPU: 2 PID: 3643 Comm: syz-executor397 Not tainted 5.17.0-rc3-syzkaller-00316-gb81b1829e7e3 #0 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.14.0-204/01/2014 RIP: 0010:scsi_alloc_sgtables+0xc7d/0xf70 drivers/scsi/scsi_lib.c:1032 Code: e7 fc 31 ff 44 89 f6 e8 c1 4e e7 fc 45 85 f6 0f 84 1a f5 ff ff e8 93 4c e7 fc 83 c5 01 0f b7 ed e9 0f f5 ff ff e8 83 4c e7 fc <0f> 0b 41 bc 0a 00 00 00 e9 2b fb ff ff 41 bc 09 00 00 00 e9 20 fb RSP: 0018:ffffc90000d07558 EFLAGS: 00010293 RAX: 0000000000000000 RBX: ffff88801bfc96a0 RCX: 0000000000000000 RDX: ffff88801c876000 RSI: ffffffff849060bd RDI: 0000000000000003 RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000000 R10: ffffffff849055b9 R11: 0000000000000000 R12: ffff888012b8c000 R13: ffff88801bfc9580 R14: 0000000000000000 R15: ffff88801432c000 FS: 00007effdec8e700(0000) GS:ffff88802cc00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007effdec6d718 CR3: 00000000206d6000 CR4: 0000000000150ee0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: scsi_setup_scsi_cmnd drivers/scsi/scsi_lib.c:1219 [inline] scsi_prepare_cmd drivers/scsi/scsi_lib.c:1614 [inline] scsi_queue_rq+0x283e/0x3630 drivers/scsi/scsi_lib.c:1730 blk_mq_dispatch_rq_list+0x6ea/0x22e0 block/blk-mq.c:1851 __blk_mq_sched_dispatch_requests+0x20b/0x410 block/blk-mq-sched.c:299 blk_mq_sched_dispatch_requests+0xfb/0x180 block/blk-mq-sched.c:332 __blk_mq_run_hw_queue+0xf9/0x350 block/blk-mq.c:1968 __blk_mq_delay_run_hw_queue+0x5b6/0x6c0 block/blk-mq.c:2045 blk_mq_run_hw_queue+0x30f/0x480 block/blk-mq.c:2096 blk_mq_sched_insert_request+0x340/0x440 block/blk-mq-sched.c:451 blk_execute_rq+0xcc/0x340 block/blk-mq.c:1231 sg_io+0x67c/0x1210 drivers/scsi/scsi_ioctl.c:485 scsi_ioctl_sg_io drivers/scsi/scsi_ioctl.c:866 [inline] scsi_ioctl+0xa66/0x1560 drivers/scsi/scsi_ioctl.c:921 sd_ioctl+0x199/0x2a0 drivers/scsi/sd.c:1576 blkdev_ioctl+0x37a/0x800 block/ioctl.c:588 vfs_ioctl fs/ioctl.c:51 [inline] __do_sys_ioctl fs/ioctl.c:874 [inline] __se_sys_ioctl fs/ioctl.c:860 [inline] __x64_sys_ioctl+0x193/0x200 fs/ioctl.c:860 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x44/0xae RIP: 0033:0x7effdecdc5d9 Code: 28 00 00 00 75 05 48 83 c4 28 c3 e8 81 14 00 00 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 b8 ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007effdec8e2f8 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 RAX: ffffffffffffffda RBX: 00007effded664c0 RCX: 00007effdecdc5d9 RDX: 0000000020002300 RSI: 0000000000002285 RDI: 0000000000000004 RBP: 00007effded34034 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000003 R13: 00007effded34054 R14: 2f30656c69662f2e R15: 00007effded664c8 Link: https://lore.kernel.org/r/20220720025120.3226770-1-yanaijie@huawei.com Fixes: 25636e282fe9 ("block: fix SG_IO vector request data length handling") Reported-by: syzbot+d44b35ecfb807e5af0b5@syzkaller.appspotmail.com Reviewed-by: Christoph Hellwig Reviewed-by: Bart Van Assche Signed-off-by: Jason Yan Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/scsi_ioctl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/scsi_ioctl.c b/drivers/scsi/scsi_ioctl.c index a06c61f22742e5..6e2f82152b4a12 100644 --- a/drivers/scsi/scsi_ioctl.c +++ b/drivers/scsi/scsi_ioctl.c @@ -457,7 +457,7 @@ static int sg_io(struct scsi_device *sdev, struct gendisk *disk, goto out_free_cdb; ret = 0; - if (hdr->iovec_count) { + if (hdr->iovec_count && hdr->dxfer_len) { struct iov_iter i; struct iovec *iov = NULL; From 6a4a1c70e4460c824eefd038af6787c87851fad3 Mon Sep 17 00:00:00 2001 From: David Jeffery Date: Fri, 22 Jul 2022 10:24:48 -0400 Subject: [PATCH 0984/1056] scsi: mpt3sas: Stop fw fault watchdog work item during system shutdown commit 0fde22c5420ed258ee538a760291c2f3935f6a01 upstream. During system shutdown or reboot, mpt3sas will reset the firmware back to ready state. However, the driver leaves running a watchdog work item intended to keep the firmware in operational state. This causes a second, unneeded reset on shutdown and moves the firmware back to operational instead of in ready state as intended. And if the mpt3sas_fwfault_debug module parameter is set, this extra reset also panics the system. mpt3sas's scsih_shutdown needs to stop the watchdog before resetting the firmware back to ready state. Link: https://lore.kernel.org/r/20220722142448.6289-1-djeffery@redhat.com Fixes: fae21608c31c ("scsi: mpt3sas: Transition IOC to Ready state during shutdown") Tested-by: Laurence Oberman Acked-by: Sreekanth Reddy Signed-off-by: David Jeffery Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/mpt3sas/mpt3sas_scsih.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c index af275ac4279541..5351959fbaba33 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c +++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c @@ -11386,6 +11386,7 @@ scsih_shutdown(struct pci_dev *pdev) _scsih_ir_shutdown(ioc); _scsih_nvme_shutdown(ioc); mpt3sas_base_mask_interrupts(ioc); + mpt3sas_base_stop_watchdog(ioc); ioc->shost_recovery = 1; mpt3sas_base_make_ioc_ready(ioc, SOFT_RESET); ioc->shost_recovery = 0; From 4845d3ef64451dac12e108b3c80c0a34cc77f206 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 27 Jul 2022 18:22:20 -0700 Subject: [PATCH 0985/1056] net: ping6: Fix memleak in ipv6_renew_options(). commit e27326009a3d247b831eda38878c777f6f4eb3d1 upstream. When we close ping6 sockets, some resources are left unfreed because pingv6_prot is missing sk->sk_prot->destroy(). As reported by syzbot [0], just three syscalls leak 96 bytes and easily cause OOM. struct ipv6_sr_hdr *hdr; char data[24] = {0}; int fd; hdr = (struct ipv6_sr_hdr *)data; hdr->hdrlen = 2; hdr->type = IPV6_SRCRT_TYPE_4; fd = socket(AF_INET6, SOCK_DGRAM, NEXTHDR_ICMP); setsockopt(fd, IPPROTO_IPV6, IPV6_RTHDR, data, 24); close(fd); To fix memory leaks, let's add a destroy function. Note the socket() syscall checks if the GID is within the range of net.ipv4.ping_group_range. The default value is [1, 0] so that no GID meets the condition (1 <= GID <= 0). Thus, the local DoS does not succeed until we change the default value. However, at least Ubuntu/Fedora/RHEL loosen it. $ cat /usr/lib/sysctl.d/50-default.conf ... -net.ipv4.ping_group_range = 0 2147483647 Also, there could be another path reported with these options, and some of them require CAP_NET_RAW. setsockopt IPV6_ADDRFORM (inet6_sk(sk)->pktoptions) IPV6_RECVPATHMTU (inet6_sk(sk)->rxpmtu) IPV6_HOPOPTS (inet6_sk(sk)->opt) IPV6_RTHDRDSTOPTS (inet6_sk(sk)->opt) IPV6_RTHDR (inet6_sk(sk)->opt) IPV6_DSTOPTS (inet6_sk(sk)->opt) IPV6_2292PKTOPTIONS (inet6_sk(sk)->opt) getsockopt IPV6_FLOWLABEL_MGR (inet6_sk(sk)->ipv6_fl_list) For the record, I left a different splat with syzbot's one. unreferenced object 0xffff888006270c60 (size 96): comm "repro2", pid 231, jiffies 4294696626 (age 13.118s) hex dump (first 32 bytes): 01 00 00 00 44 00 00 00 00 00 00 00 00 00 00 00 ....D........... 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [<00000000f6bc7ea9>] sock_kmalloc (net/core/sock.c:2564 net/core/sock.c:2554) [<000000006d699550>] do_ipv6_setsockopt.constprop.0 (net/ipv6/ipv6_sockglue.c:715) [<00000000c3c3b1f5>] ipv6_setsockopt (net/ipv6/ipv6_sockglue.c:1024) [<000000007096a025>] __sys_setsockopt (net/socket.c:2254) [<000000003a8ff47b>] __x64_sys_setsockopt (net/socket.c:2265 net/socket.c:2262 net/socket.c:2262) [<000000007c409dcb>] do_syscall_64 (arch/x86/entry/common.c:50 arch/x86/entry/common.c:80) [<00000000e939c4a9>] entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:120) [0]: https://syzkaller.appspot.com/bug?extid=a8430774139ec3ab7176 Fixes: 6d0bfe226116 ("net: ipv6: Add IPv6 support to the ping socket.") Reported-by: syzbot+a8430774139ec3ab7176@syzkaller.appspotmail.com Reported-by: Ayushman Dutta Signed-off-by: Kuniyuki Iwashima Reviewed-by: David Ahern Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20220728012220.46918-1-kuniyu@amazon.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/ipv6/ping.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c index 6ac88fe24a8e07..135e3a060caa88 100644 --- a/net/ipv6/ping.c +++ b/net/ipv6/ping.c @@ -22,6 +22,11 @@ #include #include +static void ping_v6_destroy(struct sock *sk) +{ + inet6_destroy_sock(sk); +} + /* Compatibility glue so we can support IPv6 when it's compiled as a module */ static int dummy_ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) @@ -166,6 +171,7 @@ struct proto pingv6_prot = { .owner = THIS_MODULE, .init = ping_init_sock, .close = ping_close, + .destroy = ping_v6_destroy, .connect = ip6_datagram_connect_v6_only, .disconnect = __udp_disconnect, .setsockopt = ipv6_setsockopt, From 189e370b8250d9c07091f9cf58b6618945c1ec71 Mon Sep 17 00:00:00 2001 From: Ziyang Xuan Date: Thu, 28 Jul 2022 09:33:07 +0800 Subject: [PATCH 0986/1056] ipv6/addrconf: fix a null-ptr-deref bug for ip6_ptr commit 85f0173df35e5462d89947135a6a5599c6c3ef6f upstream. Change net device's MTU to smaller than IPV6_MIN_MTU or unregister device while matching route. That may trigger null-ptr-deref bug for ip6_ptr probability as following. ========================================================= BUG: KASAN: null-ptr-deref in find_match.part.0+0x70/0x134 Read of size 4 at addr 0000000000000308 by task ping6/263 CPU: 2 PID: 263 Comm: ping6 Not tainted 5.19.0-rc7+ #14 Call trace: dump_backtrace+0x1a8/0x230 show_stack+0x20/0x70 dump_stack_lvl+0x68/0x84 print_report+0xc4/0x120 kasan_report+0x84/0x120 __asan_load4+0x94/0xd0 find_match.part.0+0x70/0x134 __find_rr_leaf+0x408/0x470 fib6_table_lookup+0x264/0x540 ip6_pol_route+0xf4/0x260 ip6_pol_route_output+0x58/0x70 fib6_rule_lookup+0x1a8/0x330 ip6_route_output_flags_noref+0xd8/0x1a0 ip6_route_output_flags+0x58/0x160 ip6_dst_lookup_tail+0x5b4/0x85c ip6_dst_lookup_flow+0x98/0x120 rawv6_sendmsg+0x49c/0xc70 inet_sendmsg+0x68/0x94 Reproducer as following: Firstly, prepare conditions: $ip netns add ns1 $ip netns add ns2 $ip link add veth1 type veth peer name veth2 $ip link set veth1 netns ns1 $ip link set veth2 netns ns2 $ip netns exec ns1 ip -6 addr add 2001:0db8:0:f101::1/64 dev veth1 $ip netns exec ns2 ip -6 addr add 2001:0db8:0:f101::2/64 dev veth2 $ip netns exec ns1 ifconfig veth1 up $ip netns exec ns2 ifconfig veth2 up $ip netns exec ns1 ip -6 route add 2000::/64 dev veth1 metric 1 $ip netns exec ns2 ip -6 route add 2001::/64 dev veth2 metric 1 Secondly, execute the following two commands in two ssh windows respectively: $ip netns exec ns1 sh $while true; do ip -6 addr add 2001:0db8:0:f101::1/64 dev veth1; ip -6 route add 2000::/64 dev veth1 metric 1; ping6 2000::2; done $ip netns exec ns1 sh $while true; do ip link set veth1 mtu 1000; ip link set veth1 mtu 1500; sleep 5; done It is because ip6_ptr has been assigned to NULL in addrconf_ifdown() firstly, then ip6_ignore_linkdown() accesses ip6_ptr directly without NULL check. cpu0 cpu1 fib6_table_lookup __find_rr_leaf addrconf_notify [ NETDEV_CHANGEMTU ] addrconf_ifdown RCU_INIT_POINTER(dev->ip6_ptr, NULL) find_match ip6_ignore_linkdown So we can add NULL check for ip6_ptr before using in ip6_ignore_linkdown() to fix the null-ptr-deref bug. Fixes: dcd1f572954f ("net/ipv6: Remove fib6_idev") Signed-off-by: Ziyang Xuan Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20220728013307.656257-1-william.xuanziyang@huawei.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- include/net/addrconf.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/include/net/addrconf.h b/include/net/addrconf.h index 59940e230b782f..53627afab1044b 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -403,6 +403,9 @@ static inline bool ip6_ignore_linkdown(const struct net_device *dev) { const struct inet6_dev *idev = __in6_dev_get(dev); + if (unlikely(!idev)) + return true; + return !!idev->cnf.ignore_routes_with_linkdown; } From ad6d6ae4a34c0957355886e0754c7b8cf7c47f56 Mon Sep 17 00:00:00 2001 From: Maxim Mikityanskiy Date: Thu, 21 Jul 2022 12:11:27 +0300 Subject: [PATCH 0987/1056] net/tls: Remove the context from the list in tls_device_down commit f6336724a4d4220c89a4ec38bca84b03b178b1a3 upstream. tls_device_down takes a reference on all contexts it's going to move to the degraded state (software fallback). If sk_destruct runs afterwards, it can reduce the reference counter back to 1 and return early without destroying the context. Then tls_device_down will release the reference it took and call tls_device_free_ctx. However, the context will still stay in tls_device_down_list forever. The list will contain an item, memory for which is released, making a memory corruption possible. Fix the above bug by properly removing the context from all lists before any call to tls_device_free_ctx. Fixes: 3740651bf7e2 ("tls: Fix context leak on tls_device_down") Signed-off-by: Maxim Mikityanskiy Reviewed-by: Tariq Toukan Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/tls/tls_device.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c index 4e33150cfb9e1c..cf75969375cfae 100644 --- a/net/tls/tls_device.c +++ b/net/tls/tls_device.c @@ -1351,8 +1351,13 @@ static int tls_device_down(struct net_device *netdev) * by tls_device_free_ctx. rx_conf and tx_conf stay in TLS_HW. * Now release the ref taken above. */ - if (refcount_dec_and_test(&ctx->refcount)) + if (refcount_dec_and_test(&ctx->refcount)) { + /* sk_destruct ran after tls_device_down took a ref, and + * it returned early. Complete the destruction here. + */ + list_del(&ctx->list); tls_device_free_ctx(ctx); + } } up_write(&device_offload_lock); From c721324afc589f8ea54bae04756b150aeaae5fa4 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Fri, 15 Jul 2022 10:17:44 -0700 Subject: [PATCH 0988/1056] igmp: Fix data-races around sysctl_igmp_qrv. [ Upstream commit 8ebcc62c738f68688ee7c6fec2efe5bc6d3d7e60 ] While reading sysctl_igmp_qrv, it can be changed concurrently. Thus, we need to add READ_ONCE() to its readers. This test can be packed into a helper, so such changes will be in the follow-up series after net is merged into net-next. qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv); Fixes: a9fe8e29945d ("ipv4: implement igmp_qrv sysctl to tune igmp robustness variable") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/ipv4/igmp.c | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 9f4674244aff33..e07d10b2c48684 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -827,7 +827,7 @@ static void igmp_ifc_event(struct in_device *in_dev) struct net *net = dev_net(in_dev->dev); if (IGMP_V1_SEEN(in_dev) || IGMP_V2_SEEN(in_dev)) return; - WRITE_ONCE(in_dev->mr_ifc_count, in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv); + WRITE_ONCE(in_dev->mr_ifc_count, in_dev->mr_qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv)); igmp_ifc_start_timer(in_dev, 1); } @@ -1009,7 +1009,7 @@ static bool igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb, * received value was zero, use the default or statically * configured value. */ - in_dev->mr_qrv = ih3->qrv ?: net->ipv4.sysctl_igmp_qrv; + in_dev->mr_qrv = ih3->qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv); in_dev->mr_qi = IGMPV3_QQIC(ih3->qqic)*HZ ?: IGMP_QUERY_INTERVAL; /* RFC3376, 8.3. Query Response Interval: @@ -1189,7 +1189,7 @@ static void igmpv3_add_delrec(struct in_device *in_dev, struct ip_mc_list *im, pmc->interface = im->interface; in_dev_hold(in_dev); pmc->multiaddr = im->multiaddr; - pmc->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv; + pmc->crcount = in_dev->mr_qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv); pmc->sfmode = im->sfmode; if (pmc->sfmode == MCAST_INCLUDE) { struct ip_sf_list *psf; @@ -1240,9 +1240,11 @@ static void igmpv3_del_delrec(struct in_device *in_dev, struct ip_mc_list *im) swap(im->tomb, pmc->tomb); swap(im->sources, pmc->sources); for (psf = im->sources; psf; psf = psf->sf_next) - psf->sf_crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv; + psf->sf_crcount = in_dev->mr_qrv ?: + READ_ONCE(net->ipv4.sysctl_igmp_qrv); } else { - im->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv; + im->crcount = in_dev->mr_qrv ?: + READ_ONCE(net->ipv4.sysctl_igmp_qrv); } in_dev_put(pmc->interface); kfree_pmc(pmc); @@ -1349,7 +1351,7 @@ static void igmp_group_added(struct ip_mc_list *im) if (in_dev->dead) return; - im->unsolicit_count = net->ipv4.sysctl_igmp_qrv; + im->unsolicit_count = READ_ONCE(net->ipv4.sysctl_igmp_qrv); if (IGMP_V1_SEEN(in_dev) || IGMP_V2_SEEN(in_dev)) { spin_lock_bh(&im->lock); igmp_start_timer(im, IGMP_INITIAL_REPORT_DELAY); @@ -1363,7 +1365,7 @@ static void igmp_group_added(struct ip_mc_list *im) * IN() to IN(A). */ if (im->sfmode == MCAST_EXCLUDE) - im->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv; + im->crcount = in_dev->mr_qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv); igmp_ifc_event(in_dev); #endif @@ -1754,7 +1756,7 @@ static void ip_mc_reset(struct in_device *in_dev) in_dev->mr_qi = IGMP_QUERY_INTERVAL; in_dev->mr_qri = IGMP_QUERY_RESPONSE_INTERVAL; - in_dev->mr_qrv = net->ipv4.sysctl_igmp_qrv; + in_dev->mr_qrv = READ_ONCE(net->ipv4.sysctl_igmp_qrv); } #else static void ip_mc_reset(struct in_device *in_dev) @@ -1888,7 +1890,7 @@ static int ip_mc_del1_src(struct ip_mc_list *pmc, int sfmode, #ifdef CONFIG_IP_MULTICAST if (psf->sf_oldin && !IGMP_V1_SEEN(in_dev) && !IGMP_V2_SEEN(in_dev)) { - psf->sf_crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv; + psf->sf_crcount = in_dev->mr_qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv); psf->sf_next = pmc->tomb; pmc->tomb = psf; rv = 1; @@ -1952,7 +1954,7 @@ static int ip_mc_del_src(struct in_device *in_dev, __be32 *pmca, int sfmode, /* filter mode change */ pmc->sfmode = MCAST_INCLUDE; #ifdef CONFIG_IP_MULTICAST - pmc->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv; + pmc->crcount = in_dev->mr_qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv); WRITE_ONCE(in_dev->mr_ifc_count, pmc->crcount); for (psf = pmc->sources; psf; psf = psf->sf_next) psf->sf_crcount = 0; @@ -2131,7 +2133,7 @@ static int ip_mc_add_src(struct in_device *in_dev, __be32 *pmca, int sfmode, #ifdef CONFIG_IP_MULTICAST /* else no filters; keep old mode for reports */ - pmc->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv; + pmc->crcount = in_dev->mr_qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv); WRITE_ONCE(in_dev->mr_ifc_count, pmc->crcount); for (psf = pmc->sources; psf; psf = psf->sf_next) psf->sf_crcount = 0; From 40f4739bbd36796e7029a824dfa4509d08d2f776 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Wed, 20 Jul 2022 14:20:57 +0300 Subject: [PATCH 0989/1056] net: pcs: xpcs: propagate xpcs_read error to xpcs_get_state_c37_sgmii [ Upstream commit 27161db0904ee48e59140aa8d0835939a666c1f1 ] While phylink_pcs_ops :: pcs_get_state does return void, xpcs_get_state() does check for a non-zero return code from xpcs_get_state_c37_sgmii() and prints that as a message to the kernel log. However, a non-zero return code from xpcs_read() is translated into "return false" (i.e. zero as int) and the I/O error is therefore not printed. Fix that. Fixes: b97b5331b8ab ("net: pcs: add C37 SGMII AN support for intel mGbE controller") Signed-off-by: Vladimir Oltean Link: https://lore.kernel.org/r/20220720112057.3504398-1-vladimir.oltean@nxp.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/pcs/pcs-xpcs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/pcs/pcs-xpcs.c b/drivers/net/pcs/pcs-xpcs.c index 7de631f5356fc8..fd4cbf8a55ad7a 100644 --- a/drivers/net/pcs/pcs-xpcs.c +++ b/drivers/net/pcs/pcs-xpcs.c @@ -890,7 +890,7 @@ static int xpcs_get_state_c37_sgmii(struct dw_xpcs *xpcs, */ ret = xpcs_read(xpcs, MDIO_MMD_VEND2, DW_VR_MII_AN_INTR_STS); if (ret < 0) - return false; + return ret; if (ret & DW_VR_MII_C37_ANSGM_SP_LNKSTS) { int speed_value; From 777d18e65d0901d5cb05c0497e359192279f0498 Mon Sep 17 00:00:00 2001 From: Liang He Date: Wed, 20 Jul 2022 21:10:03 +0800 Subject: [PATCH 0990/1056] net: sungem_phy: Add of_node_put() for reference returned by of_get_parent() [ Upstream commit ebbbe23fdf6070e31509638df3321688358cc211 ] In bcm5421_init(), we should call of_node_put() for the reference returned by of_get_parent() which has increased the refcount. Fixes: 3c326fe9cb7a ("[PATCH] ppc64: Add new PHY to sungem") Signed-off-by: Liang He Link: https://lore.kernel.org/r/20220720131003.1287426-1-windhl@126.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/sungem_phy.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/sungem_phy.c b/drivers/net/sungem_phy.c index 291fa449993fb5..45f295403cb55f 100644 --- a/drivers/net/sungem_phy.c +++ b/drivers/net/sungem_phy.c @@ -454,6 +454,7 @@ static int bcm5421_init(struct mii_phy* phy) int can_low_power = 1; if (np == NULL || of_get_property(np, "no-autolowpower", NULL)) can_low_power = 0; + of_node_put(np); if (can_low_power) { /* Enable automatic low-power */ sungem_phy_write(phy, 0x1c, 0x9002); From 922ca9fd221ba4b74abeee94675dd04d1307b8dd Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 20 Jul 2022 09:50:22 -0700 Subject: [PATCH 0991/1056] tcp: Fix a data-race around sysctl_tcp_min_tso_segs. [ Upstream commit e0bb4ab9dfddd872622239f49fb2bd403b70853b ] While reading sysctl_tcp_min_tso_segs, it can be changed concurrently. Thus, we need to add READ_ONCE() to its reader. Fixes: 95bd09eb2750 ("tcp: TSO packets automatic sizing") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/ipv4/tcp_output.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 4389ad7e4c8ca1..51f31311fdb672 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1986,7 +1986,7 @@ static u32 tcp_tso_segs(struct sock *sk, unsigned int mss_now) min_tso = ca_ops->min_tso_segs ? ca_ops->min_tso_segs(sk) : - sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs; + READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs); tso_segs = tcp_tso_autosize(sk, mss_now, min_tso); return min_t(u32, tso_segs, sk->sk_gso_max_segs); From bd07f2e70a4ba1b343e8e1efea4136411b1b114d Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 20 Jul 2022 09:50:24 -0700 Subject: [PATCH 0992/1056] tcp: Fix a data-race around sysctl_tcp_min_rtt_wlen. [ Upstream commit 1330ffacd05fc9ac4159d19286ce119e22450ed2 ] While reading sysctl_tcp_min_rtt_wlen, it can be changed concurrently. Thus, we need to add READ_ONCE() to its reader. Fixes: f672258391b4 ("tcp: track min RTT using windowed min-filter") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/ipv4/tcp_input.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index a5357ebfbcc0fa..b925c766f1d24e 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3050,7 +3050,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una, static void tcp_update_rtt_min(struct sock *sk, u32 rtt_us, const int flag) { - u32 wlen = sock_net(sk)->ipv4.sysctl_tcp_min_rtt_wlen * HZ; + u32 wlen = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_min_rtt_wlen) * HZ; struct tcp_sock *tp = tcp_sk(sk); if ((flag & FLAG_ACK_MAYBE_DELAYED) && rtt_us > tcp_min_rtt(tp)) { From 6f446677ebb34665a631ad408155f6cf1bd02b09 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 20 Jul 2022 09:50:25 -0700 Subject: [PATCH 0993/1056] tcp: Fix a data-race around sysctl_tcp_autocorking. [ Upstream commit 85225e6f0a76e6745bc841c9f25169c509b573d8 ] While reading sysctl_tcp_autocorking, it can be changed concurrently. Thus, we need to add READ_ONCE() to its reader. Fixes: f54b311142a9 ("tcp: auto corking") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/ipv4/tcp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 1abdb871265596..7ba9059c263a84 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -694,7 +694,7 @@ static bool tcp_should_autocork(struct sock *sk, struct sk_buff *skb, int size_goal) { return skb->len < size_goal && - sock_net(sk)->ipv4.sysctl_tcp_autocorking && + READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_autocorking) && !tcp_rtx_queue_empty(sk) && refcount_read(&sk->sk_wmem_alloc) > skb->truesize; } From dac5644a823ef52d5abdeb3db88d49e98e4a0f29 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 20 Jul 2022 09:50:26 -0700 Subject: [PATCH 0994/1056] tcp: Fix a data-race around sysctl_tcp_invalid_ratelimit. [ Upstream commit 2afdbe7b8de84c28e219073a6661080e1b3ded48 ] While reading sysctl_tcp_invalid_ratelimit, it can be changed concurrently. Thus, we need to add READ_ONCE() to its reader. Fixes: 032ee4236954 ("tcp: helpers to mitigate ACK loops by rate-limiting out-of-window dupacks") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/ipv4/tcp_input.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index b925c766f1d24e..018be3f346e6a4 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3574,7 +3574,8 @@ static bool __tcp_oow_rate_limited(struct net *net, int mib_idx, if (*last_oow_ack_time) { s32 elapsed = (s32)(tcp_jiffies32 - *last_oow_ack_time); - if (0 <= elapsed && elapsed < net->ipv4.sysctl_tcp_invalid_ratelimit) { + if (0 <= elapsed && + elapsed < READ_ONCE(net->ipv4.sysctl_tcp_invalid_ratelimit)) { NET_INC_STATS(net, mib_idx); return true; /* rate-limited: don't send yet! */ } From 830582c16be1ce0c8fa489d8973871587da617c9 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Thu, 21 Jul 2022 10:35:46 -0400 Subject: [PATCH 0995/1056] Documentation: fix sctp_wmem in ip-sysctl.rst [ Upstream commit aa709da0e032cee7c202047ecd75f437bb0126ed ] Since commit 1033990ac5b2 ("sctp: implement memory accounting on tx path"), SCTP has supported memory accounting on tx path where 'sctp_wmem' is used by sk_wmem_schedule(). So we should fix the description for this option in ip-sysctl.rst accordingly. v1->v2: - Improve the description as Marcelo suggested. Fixes: 1033990ac5b2 ("sctp: implement memory accounting on tx path") Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- Documentation/networking/ip-sysctl.rst | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst index b8b67041f95521..ba0e8e6337c0a3 100644 --- a/Documentation/networking/ip-sysctl.rst +++ b/Documentation/networking/ip-sysctl.rst @@ -2808,7 +2808,14 @@ sctp_rmem - vector of 3 INTEGERs: min, default, max Default: 4K sctp_wmem - vector of 3 INTEGERs: min, default, max - Currently this tunable has no effect. + Only the first value ("min") is used, "default" and "max" are + ignored. + + min: Minimum size of send buffer that can be used by SCTP sockets. + It is guaranteed to each SCTP socket (but not association) even + under moderate memory pressure. + + Default: 4K addr_scope_policy - INTEGER Control IPv4 address scoping - draft-stewart-tsvwg-sctp-ipv4-00 From 8991687d3bcf6b031b5679d7bc6dfaad68e6dcc4 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Fri, 22 Jul 2022 11:16:27 +0200 Subject: [PATCH 0996/1056] macsec: fix NULL deref in macsec_add_rxsa [ Upstream commit f46040eeaf2e523a4096199fd93a11e794818009 ] Commit 48ef50fa866a added a test on tb_sa[MACSEC_SA_ATTR_PN], but nothing guarantees that it's not NULL at this point. The same code was added to macsec_add_txsa, but there it's not a problem because validate_add_txsa checks that the MACSEC_SA_ATTR_PN attribute is present. Note: it's not possible to reproduce with iproute, because iproute doesn't allow creating an SA without specifying the PN. Fixes: 48ef50fa866a ("macsec: Netlink support of XPN cipher suites (IEEE 802.1AEbw)") Link: https://bugzilla.kernel.org/show_bug.cgi?id=208315 Reported-by: Frantisek Sumsal Signed-off-by: Sabrina Dubroca Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/macsec.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index e53b40359fd103..f72d4380374ddb 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -1751,7 +1751,8 @@ static int macsec_add_rxsa(struct sk_buff *skb, struct genl_info *info) } pn_len = secy->xpn ? MACSEC_XPN_PN_LEN : MACSEC_DEFAULT_PN_LEN; - if (nla_len(tb_sa[MACSEC_SA_ATTR_PN]) != pn_len) { + if (tb_sa[MACSEC_SA_ATTR_PN] && + nla_len(tb_sa[MACSEC_SA_ATTR_PN]) != pn_len) { pr_notice("macsec: nl: add_rxsa: bad pn length: %d != %d\n", nla_len(tb_sa[MACSEC_SA_ATTR_PN]), pn_len); rtnl_unlock(); From 6ad56d5c4f98ed160b1db463e0d56e6be7ab77a0 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Fri, 22 Jul 2022 11:16:28 +0200 Subject: [PATCH 0997/1056] macsec: fix error message in macsec_add_rxsa and _txsa [ Upstream commit 3240eac4ff20e51b87600dbd586ed814daf313db ] The expected length is MACSEC_SALT_LEN, not MACSEC_SA_ATTR_SALT. Fixes: 48ef50fa866a ("macsec: Netlink support of XPN cipher suites (IEEE 802.1AEbw)") Signed-off-by: Sabrina Dubroca Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/macsec.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index f72d4380374ddb..9ede0d7cd0b5c2 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -1768,7 +1768,7 @@ static int macsec_add_rxsa(struct sk_buff *skb, struct genl_info *info) if (nla_len(tb_sa[MACSEC_SA_ATTR_SALT]) != MACSEC_SALT_LEN) { pr_notice("macsec: nl: add_rxsa: bad salt length: %d != %d\n", nla_len(tb_sa[MACSEC_SA_ATTR_SALT]), - MACSEC_SA_ATTR_SALT); + MACSEC_SALT_LEN); rtnl_unlock(); return -EINVAL; } @@ -2010,7 +2010,7 @@ static int macsec_add_txsa(struct sk_buff *skb, struct genl_info *info) if (nla_len(tb_sa[MACSEC_SA_ATTR_SALT]) != MACSEC_SALT_LEN) { pr_notice("macsec: nl: add_txsa: bad salt length: %d != %d\n", nla_len(tb_sa[MACSEC_SA_ATTR_SALT]), - MACSEC_SA_ATTR_SALT); + MACSEC_SALT_LEN); rtnl_unlock(); return -EINVAL; } From a706a40d42f482ae3f03998a63dfdc8a8f3640c4 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Fri, 22 Jul 2022 11:16:29 +0200 Subject: [PATCH 0998/1056] macsec: limit replay window size with XPN [ Upstream commit b07a0e2044057f201d694ab474f5c42a02b6465b ] IEEE 802.1AEbw-2013 (section 10.7.8) specifies that the maximum value of the replay window is 2^30-1, to help with recovery of the upper bits of the PN. To avoid leaving the existing macsec device in an inconsistent state if this test fails during changelink, reuse the cleanup mechanism introduced for HW offload. This wasn't needed until now because macsec_changelink_common could not fail during changelink, as modifying the cipher suite was not allowed. Finally, this must happen after handling IFLA_MACSEC_CIPHER_SUITE so that secy->xpn is set. Fixes: 48ef50fa866a ("macsec: Netlink support of XPN cipher suites (IEEE 802.1AEbw)") Signed-off-by: Sabrina Dubroca Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/macsec.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index 9ede0d7cd0b5c2..1f2eb576533cc6 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -241,6 +241,7 @@ static struct macsec_cb *macsec_skb_cb(struct sk_buff *skb) #define DEFAULT_SEND_SCI true #define DEFAULT_ENCRYPT false #define DEFAULT_ENCODING_SA 0 +#define MACSEC_XPN_MAX_REPLAY_WINDOW (((1 << 30) - 1)) static bool send_sci(const struct macsec_secy *secy) { @@ -3739,9 +3740,6 @@ static int macsec_changelink_common(struct net_device *dev, secy->operational = tx_sa && tx_sa->active; } - if (data[IFLA_MACSEC_WINDOW]) - secy->replay_window = nla_get_u32(data[IFLA_MACSEC_WINDOW]); - if (data[IFLA_MACSEC_ENCRYPT]) tx_sc->encrypt = !!nla_get_u8(data[IFLA_MACSEC_ENCRYPT]); @@ -3787,6 +3785,16 @@ static int macsec_changelink_common(struct net_device *dev, } } + if (data[IFLA_MACSEC_WINDOW]) { + secy->replay_window = nla_get_u32(data[IFLA_MACSEC_WINDOW]); + + /* IEEE 802.1AEbw-2013 10.7.8 - maximum replay window + * for XPN cipher suites */ + if (secy->xpn && + secy->replay_window > MACSEC_XPN_MAX_REPLAY_WINDOW) + return -EINVAL; + } + return 0; } @@ -3816,7 +3824,7 @@ static int macsec_changelink(struct net_device *dev, struct nlattr *tb[], ret = macsec_changelink_common(dev, data); if (ret) - return ret; + goto cleanup; /* If h/w offloading is available, propagate to the device */ if (macsec_is_offloaded(macsec)) { From 2959a86a472f8828e0aeb6d558b64823f7af4a24 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Fri, 22 Jul 2022 11:16:30 +0200 Subject: [PATCH 0999/1056] macsec: always read MACSEC_SA_ATTR_PN as a u64 [ Upstream commit c630d1fe6219769049c87d1a6a0e9a6de55328a1 ] Currently, MACSEC_SA_ATTR_PN is handled inconsistently, sometimes as a u32, sometimes forced into a u64 without checking the actual length of the attribute. Instead, we can use nla_get_u64 everywhere, which will read up to 64 bits into a u64, capped by the actual length of the attribute coming from userspace. This fixes several issues: - the check in validate_add_rxsa doesn't work with 32-bit attributes - the checks in validate_add_txsa and validate_upd_sa incorrectly reject X << 32 (with X != 0) Fixes: 48ef50fa866a ("macsec: Netlink support of XPN cipher suites (IEEE 802.1AEbw)") Signed-off-by: Sabrina Dubroca Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/macsec.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index 1f2eb576533cc6..3e74dcc1f875ae 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -1696,7 +1696,7 @@ static bool validate_add_rxsa(struct nlattr **attrs) return false; if (attrs[MACSEC_SA_ATTR_PN] && - *(u64 *)nla_data(attrs[MACSEC_SA_ATTR_PN]) == 0) + nla_get_u64(attrs[MACSEC_SA_ATTR_PN]) == 0) return false; if (attrs[MACSEC_SA_ATTR_ACTIVE]) { @@ -1939,7 +1939,7 @@ static bool validate_add_txsa(struct nlattr **attrs) if (nla_get_u8(attrs[MACSEC_SA_ATTR_AN]) >= MACSEC_NUM_AN) return false; - if (nla_get_u32(attrs[MACSEC_SA_ATTR_PN]) == 0) + if (nla_get_u64(attrs[MACSEC_SA_ATTR_PN]) == 0) return false; if (attrs[MACSEC_SA_ATTR_ACTIVE]) { @@ -2293,7 +2293,7 @@ static bool validate_upd_sa(struct nlattr **attrs) if (nla_get_u8(attrs[MACSEC_SA_ATTR_AN]) >= MACSEC_NUM_AN) return false; - if (attrs[MACSEC_SA_ATTR_PN] && nla_get_u32(attrs[MACSEC_SA_ATTR_PN]) == 0) + if (attrs[MACSEC_SA_ATTR_PN] && nla_get_u64(attrs[MACSEC_SA_ATTR_PN]) == 0) return false; if (attrs[MACSEC_SA_ATTR_ACTIVE]) { From 5831ccf37a31cd09eeb5f6cb0b16775142a572f7 Mon Sep 17 00:00:00 2001 From: Jianglei Nie Date: Fri, 22 Jul 2022 17:29:02 +0800 Subject: [PATCH 1000/1056] net: macsec: fix potential resource leak in macsec_add_rxsa() and macsec_add_txsa() [ Upstream commit c7b205fbbf3cffa374721bb7623f7aa8c46074f1 ] init_rx_sa() allocates relevant resource for rx_sa->stats and rx_sa-> key.tfm with alloc_percpu() and macsec_alloc_tfm(). When some error occurs after init_rx_sa() is called in macsec_add_rxsa(), the function released rx_sa with kfree() without releasing rx_sa->stats and rx_sa-> key.tfm, which will lead to a resource leak. We should call macsec_rxsa_put() instead of kfree() to decrease the ref count of rx_sa and release the relevant resource if the refcount is 0. The same bug exists in macsec_add_txsa() for tx_sa as well. This patch fixes the above two bugs. Fixes: 3cf3227a21d1 ("net: macsec: hardware offloading infrastructure") Signed-off-by: Jianglei Nie Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/macsec.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index 3e74dcc1f875ae..354890948f8a12 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -1842,7 +1842,7 @@ static int macsec_add_rxsa(struct sk_buff *skb, struct genl_info *info) return 0; cleanup: - kfree(rx_sa); + macsec_rxsa_put(rx_sa); rtnl_unlock(); return err; } @@ -2085,7 +2085,7 @@ static int macsec_add_txsa(struct sk_buff *skb, struct genl_info *info) cleanup: secy->operational = was_operational; - kfree(tx_sa); + macsec_txsa_put(tx_sa); rtnl_unlock(); return err; } From b01b4f5b45ff251190be0816e0f83c527e6c179f Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Fri, 22 Jul 2022 17:06:35 +0000 Subject: [PATCH 1001/1056] net: mld: fix reference count leak in mld_{query | report}_work() [ Upstream commit 3e7d18b9dca388940a19cae30bfc1f76dccd8c28 ] mld_{query | report}_work() processes queued events. If there are too many events in the queue, it re-queue a work. And then, it returns without in6_dev_put(). But if queuing is failed, it should call in6_dev_put(), but it doesn't. So, a reference count leak would occur. THREAD0 THREAD1 mld_report_work() spin_lock_bh() if (!mod_delayed_work()) in6_dev_hold(); spin_unlock_bh() spin_lock_bh() schedule_delayed_work() spin_unlock_bh() Script to reproduce(by Hangbin Liu): ip netns add ns1 ip netns add ns2 ip netns exec ns1 sysctl -w net.ipv6.conf.all.force_mld_version=1 ip netns exec ns2 sysctl -w net.ipv6.conf.all.force_mld_version=1 ip -n ns1 link add veth0 type veth peer name veth0 netns ns2 ip -n ns1 link set veth0 up ip -n ns2 link set veth0 up for i in `seq 50`; do for j in `seq 100`; do ip -n ns1 addr add 2021:${i}::${j}/64 dev veth0 ip -n ns2 addr add 2022:${i}::${j}/64 dev veth0 done done modprobe -r veth ip -a netns del splat looks like: unregister_netdevice: waiting for veth0 to become free. Usage count = 2 leaked reference. ipv6_add_dev+0x324/0xec0 addrconf_notify+0x481/0xd10 raw_notifier_call_chain+0xe3/0x120 call_netdevice_notifiers+0x106/0x160 register_netdevice+0x114c/0x16b0 veth_newlink+0x48b/0xa50 [veth] rtnl_newlink+0x11a2/0x1a40 rtnetlink_rcv_msg+0x63f/0xc00 netlink_rcv_skb+0x1df/0x3e0 netlink_unicast+0x5de/0x850 netlink_sendmsg+0x6c9/0xa90 ____sys_sendmsg+0x76a/0x780 __sys_sendmsg+0x27c/0x340 do_syscall_64+0x43/0x90 entry_SYSCALL_64_after_hwframe+0x63/0xcd Tested-by: Hangbin Liu Fixes: f185de28d9ae ("mld: add new workqueues for process mld events") Signed-off-by: Taehee Yoo Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/ipv6/mcast.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 7f695c39d9a8c4..87c699d57b3669 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -1522,7 +1522,6 @@ static void mld_query_work(struct work_struct *work) if (++cnt >= MLD_MAX_QUEUE) { rework = true; - schedule_delayed_work(&idev->mc_query_work, 0); break; } } @@ -1533,8 +1532,10 @@ static void mld_query_work(struct work_struct *work) __mld_query_work(skb); mutex_unlock(&idev->mc_lock); - if (!rework) - in6_dev_put(idev); + if (rework && queue_delayed_work(mld_wq, &idev->mc_query_work, 0)) + return; + + in6_dev_put(idev); } /* called with rcu_read_lock() */ @@ -1624,7 +1625,6 @@ static void mld_report_work(struct work_struct *work) if (++cnt >= MLD_MAX_QUEUE) { rework = true; - schedule_delayed_work(&idev->mc_report_work, 0); break; } } @@ -1635,8 +1635,10 @@ static void mld_report_work(struct work_struct *work) __mld_report_work(skb); mutex_unlock(&idev->mc_lock); - if (!rework) - in6_dev_put(idev); + if (rework && queue_delayed_work(mld_wq, &idev->mc_report_work, 0)) + return; + + in6_dev_put(idev); } static bool is_in(struct ifmcaddr6 *pmc, struct ip6_sf_list *psf, int type, From a610feb170bfb381c090620d30d7ee72505ce001 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Fri, 22 Jul 2022 11:21:59 -0700 Subject: [PATCH 1002/1056] tcp: Fix data-races around sk_pacing_rate. [ Upstream commit 59bf6c65a09fff74215517aecffbbdcd67df76e3 ] While reading sysctl_tcp_pacing_(ss|ca)_ratio, they can be changed concurrently. Thus, we need to add READ_ONCE() to their readers. Fixes: 43e122b014c9 ("tcp: refine pacing rate determination") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/ipv4/tcp_input.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 018be3f346e6a4..566745f527fe7b 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -902,9 +902,9 @@ static void tcp_update_pacing_rate(struct sock *sk) * end of slow start and should slow down. */ if (tcp_snd_cwnd(tp) < tp->snd_ssthresh / 2) - rate *= sock_net(sk)->ipv4.sysctl_tcp_pacing_ss_ratio; + rate *= READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_pacing_ss_ratio); else - rate *= sock_net(sk)->ipv4.sysctl_tcp_pacing_ca_ratio; + rate *= READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_pacing_ca_ratio); rate *= max(tcp_snd_cwnd(tp), tp->packets_out); From 618116a273b7a4df296296bd1b4057e41b77e9f1 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Fri, 22 Jul 2022 11:22:00 -0700 Subject: [PATCH 1003/1056] net: Fix data-races around sysctl_[rw]mem(_offset)?. [ Upstream commit 02739545951ad4c1215160db7fbf9b7a918d3c0b ] While reading these sysctl variables, they can be changed concurrently. Thus, we need to add READ_ONCE() to their readers. - .sysctl_rmem - .sysctl_rwmem - .sysctl_rmem_offset - .sysctl_wmem_offset - sysctl_tcp_rmem[1, 2] - sysctl_tcp_wmem[1, 2] - sysctl_decnet_rmem[1] - sysctl_decnet_wmem[1] - sysctl_tipc_rmem[1] Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- include/net/sock.h | 8 ++++---- net/decnet/af_decnet.c | 4 ++-- net/ipv4/tcp.c | 6 +++--- net/ipv4/tcp_input.c | 13 +++++++------ net/ipv4/tcp_output.c | 2 +- net/mptcp/protocol.c | 6 +++--- net/tipc/socket.c | 2 +- 7 files changed, 21 insertions(+), 20 deletions(-) diff --git a/include/net/sock.h b/include/net/sock.h index 96f51d4b164997..819c53965ef35f 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -2765,18 +2765,18 @@ static inline int sk_get_wmem0(const struct sock *sk, const struct proto *proto) { /* Does this proto have per netns sysctl_wmem ? */ if (proto->sysctl_wmem_offset) - return *(int *)((void *)sock_net(sk) + proto->sysctl_wmem_offset); + return READ_ONCE(*(int *)((void *)sock_net(sk) + proto->sysctl_wmem_offset)); - return *proto->sysctl_wmem; + return READ_ONCE(*proto->sysctl_wmem); } static inline int sk_get_rmem0(const struct sock *sk, const struct proto *proto) { /* Does this proto have per netns sysctl_rmem ? */ if (proto->sysctl_rmem_offset) - return *(int *)((void *)sock_net(sk) + proto->sysctl_rmem_offset); + return READ_ONCE(*(int *)((void *)sock_net(sk) + proto->sysctl_rmem_offset)); - return *proto->sysctl_rmem; + return READ_ONCE(*proto->sysctl_rmem); } /* Default TCP Small queue budget is ~1 ms of data (1sec >> 10) diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index dc92a67baea394..7d542eb4617295 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c @@ -480,8 +480,8 @@ static struct sock *dn_alloc_sock(struct net *net, struct socket *sock, gfp_t gf sk->sk_family = PF_DECnet; sk->sk_protocol = 0; sk->sk_allocation = gfp; - sk->sk_sndbuf = sysctl_decnet_wmem[1]; - sk->sk_rcvbuf = sysctl_decnet_rmem[1]; + sk->sk_sndbuf = READ_ONCE(sysctl_decnet_wmem[1]); + sk->sk_rcvbuf = READ_ONCE(sysctl_decnet_rmem[1]); /* Initialization of DECnet Session Control Port */ scp = DN_SK(sk); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 7ba9059c263a84..2097eeaf30a677 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -458,8 +458,8 @@ void tcp_init_sock(struct sock *sk) icsk->icsk_sync_mss = tcp_sync_mss; - WRITE_ONCE(sk->sk_sndbuf, sock_net(sk)->ipv4.sysctl_tcp_wmem[1]); - WRITE_ONCE(sk->sk_rcvbuf, sock_net(sk)->ipv4.sysctl_tcp_rmem[1]); + WRITE_ONCE(sk->sk_sndbuf, READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_wmem[1])); + WRITE_ONCE(sk->sk_rcvbuf, READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[1])); sk_sockets_allocated_inc(sk); sk->sk_route_forced_caps = NETIF_F_GSO; @@ -1722,7 +1722,7 @@ int tcp_set_rcvlowat(struct sock *sk, int val) if (sk->sk_userlocks & SOCK_RCVBUF_LOCK) cap = sk->sk_rcvbuf >> 1; else - cap = sock_net(sk)->ipv4.sysctl_tcp_rmem[2] >> 1; + cap = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2]) >> 1; val = min(val, cap); WRITE_ONCE(sk->sk_rcvlowat, val ? : 1); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 566745f527fe7b..e007bdc20e82ff 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -426,7 +426,7 @@ static void tcp_sndbuf_expand(struct sock *sk) if (sk->sk_sndbuf < sndmem) WRITE_ONCE(sk->sk_sndbuf, - min(sndmem, sock_net(sk)->ipv4.sysctl_tcp_wmem[2])); + min(sndmem, READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_wmem[2]))); } /* 2. Tuning advertised window (window_clamp, rcv_ssthresh) @@ -461,7 +461,7 @@ static int __tcp_grow_window(const struct sock *sk, const struct sk_buff *skb, struct tcp_sock *tp = tcp_sk(sk); /* Optimize this! */ int truesize = tcp_win_from_space(sk, skbtruesize) >> 1; - int window = tcp_win_from_space(sk, sock_net(sk)->ipv4.sysctl_tcp_rmem[2]) >> 1; + int window = tcp_win_from_space(sk, READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2])) >> 1; while (tp->rcv_ssthresh <= window) { if (truesize <= skb->len) @@ -566,16 +566,17 @@ static void tcp_clamp_window(struct sock *sk) struct tcp_sock *tp = tcp_sk(sk); struct inet_connection_sock *icsk = inet_csk(sk); struct net *net = sock_net(sk); + int rmem2; icsk->icsk_ack.quick = 0; + rmem2 = READ_ONCE(net->ipv4.sysctl_tcp_rmem[2]); - if (sk->sk_rcvbuf < net->ipv4.sysctl_tcp_rmem[2] && + if (sk->sk_rcvbuf < rmem2 && !(sk->sk_userlocks & SOCK_RCVBUF_LOCK) && !tcp_under_memory_pressure(sk) && sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)) { WRITE_ONCE(sk->sk_rcvbuf, - min(atomic_read(&sk->sk_rmem_alloc), - net->ipv4.sysctl_tcp_rmem[2])); + min(atomic_read(&sk->sk_rmem_alloc), rmem2)); } if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf) tp->rcv_ssthresh = min(tp->window_clamp, 2U * tp->advmss); @@ -737,7 +738,7 @@ void tcp_rcv_space_adjust(struct sock *sk) do_div(rcvwin, tp->advmss); rcvbuf = min_t(u64, rcvwin * rcvmem, - sock_net(sk)->ipv4.sysctl_tcp_rmem[2]); + READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2])); if (rcvbuf > sk->sk_rcvbuf) { WRITE_ONCE(sk->sk_rcvbuf, rcvbuf); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 51f31311fdb672..9c9a0f7a3deea2 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -238,7 +238,7 @@ void tcp_select_initial_window(const struct sock *sk, int __space, __u32 mss, *rcv_wscale = 0; if (wscale_ok) { /* Set window scaling on max possible window */ - space = max_t(u32, space, sock_net(sk)->ipv4.sysctl_tcp_rmem[2]); + space = max_t(u32, space, READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2])); space = max_t(u32, space, sysctl_rmem_max); space = min_t(u32, space, *window_clamp); *rcv_wscale = clamp_t(int, ilog2(space) - 15, diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 01ede89e3c46cb..7f96e0c42a0905 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -1899,7 +1899,7 @@ static void mptcp_rcv_space_adjust(struct mptcp_sock *msk, int copied) do_div(rcvwin, advmss); rcvbuf = min_t(u64, rcvwin * rcvmem, - sock_net(sk)->ipv4.sysctl_tcp_rmem[2]); + READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2])); if (rcvbuf > sk->sk_rcvbuf) { u32 window_clamp; @@ -2532,8 +2532,8 @@ static int mptcp_init_sock(struct sock *sk) icsk->icsk_ca_ops = NULL; sk_sockets_allocated_inc(sk); - sk->sk_rcvbuf = sock_net(sk)->ipv4.sysctl_tcp_rmem[1]; - sk->sk_sndbuf = sock_net(sk)->ipv4.sysctl_tcp_wmem[1]; + sk->sk_rcvbuf = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[1]); + sk->sk_sndbuf = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_wmem[1]); return 0; } diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 43509c7e90fc28..f1c3b8eb4b3d33 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -517,7 +517,7 @@ static int tipc_sk_create(struct net *net, struct socket *sock, timer_setup(&sk->sk_timer, tipc_sk_timeout, 0); sk->sk_shutdown = 0; sk->sk_backlog_rcv = tipc_sk_backlog_rcv; - sk->sk_rcvbuf = sysctl_tipc_rmem[1]; + sk->sk_rcvbuf = READ_ONCE(sysctl_tipc_rmem[1]); sk->sk_data_ready = tipc_data_ready; sk->sk_write_space = tipc_write_space; sk->sk_destruct = tipc_sock_destruct; From 6842c94de9d5b25e0fbfa3402f4c3d6a382f7fae Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Fri, 22 Jul 2022 11:22:01 -0700 Subject: [PATCH 1004/1056] tcp: Fix a data-race around sysctl_tcp_comp_sack_delay_ns. [ Upstream commit 4866b2b0f7672b6d760c4b8ece6fb56f965dcc8a ] While reading sysctl_tcp_comp_sack_delay_ns, it can be changed concurrently. Thus, we need to add READ_ONCE() to its reader. Fixes: 6d82aa242092 ("tcp: add tcp_comp_sack_delay_ns sysctl") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/ipv4/tcp_input.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index e007bdc20e82ff..486ca1d5b436e8 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -5512,7 +5512,8 @@ static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible) if (tp->srtt_us && tp->srtt_us < rtt) rtt = tp->srtt_us; - delay = min_t(unsigned long, sock_net(sk)->ipv4.sysctl_tcp_comp_sack_delay_ns, + delay = min_t(unsigned long, + READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_comp_sack_delay_ns), rtt * (NSEC_PER_USEC >> 3)/20); sock_hold(sk); hrtimer_start_range_ns(&tp->compressed_ack_timer, ns_to_ktime(delay), From 34c9977b4dcaa22a86dab752284329f746925613 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Fri, 22 Jul 2022 11:22:02 -0700 Subject: [PATCH 1005/1056] tcp: Fix a data-race around sysctl_tcp_comp_sack_slack_ns. [ Upstream commit 22396941a7f343d704738360f9ef0e6576489d43 ] While reading sysctl_tcp_comp_sack_slack_ns, it can be changed concurrently. Thus, we need to add READ_ONCE() to its reader. Fixes: a70437cc09a1 ("tcp: add hrtimer slack to sack compression") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/ipv4/tcp_input.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 486ca1d5b436e8..7b593865b4ae9a 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -5517,7 +5517,7 @@ static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible) rtt * (NSEC_PER_USEC >> 3)/20); sock_hold(sk); hrtimer_start_range_ns(&tp->compressed_ack_timer, ns_to_ktime(delay), - sock_net(sk)->ipv4.sysctl_tcp_comp_sack_slack_ns, + READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_comp_sack_slack_ns), HRTIMER_MODE_REL_PINNED_SOFT); } From 4cc070e0ef2d212780028b62d8eb286c098c3e9c Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Fri, 22 Jul 2022 11:22:03 -0700 Subject: [PATCH 1006/1056] tcp: Fix a data-race around sysctl_tcp_comp_sack_nr. [ Upstream commit 79f55473bfc8ac51bd6572929a679eeb4da22251 ] While reading sysctl_tcp_comp_sack_nr, it can be changed concurrently. Thus, we need to add READ_ONCE() to its reader. Fixes: 9c21d2fc41c0 ("tcp: add tcp_comp_sack_nr sysctl") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/ipv4/tcp_input.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 7b593865b4ae9a..a33e6aa42a4c51 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -5491,7 +5491,7 @@ static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible) } if (!tcp_is_sack(tp) || - tp->compressed_ack >= sock_net(sk)->ipv4.sysctl_tcp_comp_sack_nr) + tp->compressed_ack >= READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_comp_sack_nr)) goto send_now; if (tp->compressed_ack_rcv_nxt != tp->rcv_nxt) { From eaccca7a0bb8770d2683e817ae6d72dad2b3d5d5 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Fri, 22 Jul 2022 11:22:04 -0700 Subject: [PATCH 1007/1056] tcp: Fix data-races around sysctl_tcp_reflect_tos. [ Upstream commit 870e3a634b6a6cb1543b359007aca73fe6a03ac5 ] While reading sysctl_tcp_reflect_tos, it can be changed concurrently. Thus, we need to add READ_ONCE() to its readers. Fixes: ac8f1710c12b ("tcp: reflect tos value received in SYN to the socket") Signed-off-by: Kuniyuki Iwashima Acked-by: Wei Wang Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/ipv4/tcp_ipv4.c | 4 ++-- net/ipv6/tcp_ipv6.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index fba02cf6b4686f..dae0776c494876 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1004,7 +1004,7 @@ static int tcp_v4_send_synack(const struct sock *sk, struct dst_entry *dst, if (skb) { __tcp_v4_send_check(skb, ireq->ir_loc_addr, ireq->ir_rmt_addr); - tos = sock_net(sk)->ipv4.sysctl_tcp_reflect_tos ? + tos = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) ? (tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) | (inet_sk(sk)->tos & INET_ECN_MASK) : inet_sk(sk)->tos; @@ -1590,7 +1590,7 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb, /* Set ToS of the new socket based upon the value of incoming SYN. * ECT bits are set later in tcp_init_transfer(). */ - if (sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) + if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos)) newinet->tos = tcp_rsk(req)->syn_tos & ~INET_ECN_MASK; if (!dst) { diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index beaa0c2ada235a..8ab39cf57d435f 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -542,7 +542,7 @@ static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst, if (np->repflow && ireq->pktopts) fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts)); - tclass = sock_net(sk)->ipv4.sysctl_tcp_reflect_tos ? + tclass = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) ? (tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) | (np->tclass & INET_ECN_MASK) : np->tclass; @@ -1364,7 +1364,7 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * /* Set ToS of the new socket based upon the value of incoming SYN. * ECT bits are set later in tcp_init_transfer(). */ - if (sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) + if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos)) newnp->tclass = tcp_rsk(req)->syn_tos & ~INET_ECN_MASK; /* Clone native IPv6 options from listening socket (if any) From 4685f16b3a5d16bc515524eeb0ed8b692a929854 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Fri, 22 Jul 2022 11:22:05 -0700 Subject: [PATCH 1008/1056] ipv4: Fix data-races around sysctl_fib_notify_on_flag_change. [ Upstream commit 96b9bd8c6d125490f9adfb57d387ef81a55a103e ] While reading sysctl_fib_notify_on_flag_change, it can be changed concurrently. Thus, we need to add READ_ONCE() to its readers. Fixes: 680aea08e78c ("net: ipv4: Emit notification when fib hardware flags are changed") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/ipv4/fib_trie.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index a9cd9c2bd84eb0..19c6e7b93d3d8c 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -1037,6 +1037,7 @@ fib_find_matching_alias(struct net *net, const struct fib_rt_info *fri) void fib_alias_hw_flags_set(struct net *net, const struct fib_rt_info *fri) { + u8 fib_notify_on_flag_change; struct fib_alias *fa_match; struct sk_buff *skb; int err; @@ -1058,14 +1059,16 @@ void fib_alias_hw_flags_set(struct net *net, const struct fib_rt_info *fri) WRITE_ONCE(fa_match->offload, fri->offload); WRITE_ONCE(fa_match->trap, fri->trap); + fib_notify_on_flag_change = READ_ONCE(net->ipv4.sysctl_fib_notify_on_flag_change); + /* 2 means send notifications only if offload_failed was changed. */ - if (net->ipv4.sysctl_fib_notify_on_flag_change == 2 && + if (fib_notify_on_flag_change == 2 && READ_ONCE(fa_match->offload_failed) == fri->offload_failed) goto out; WRITE_ONCE(fa_match->offload_failed, fri->offload_failed); - if (!net->ipv4.sysctl_fib_notify_on_flag_change) + if (!fib_notify_on_flag_change) goto out; skb = nlmsg_new(fib_nlmsg_size(fa_match->fa_info), GFP_ATOMIC); From 186fcdb68f42ef13733cef5d1be4fdfc07b8935d Mon Sep 17 00:00:00 2001 From: Michal Maloszewski Date: Fri, 22 Jul 2022 10:54:01 -0700 Subject: [PATCH 1009/1056] i40e: Fix interface init with MSI interrupts (no MSI-X) [ Upstream commit 5fcbb711024aac6d4db385623e6f2fdf019f7782 ] Fix the inability to bring an interface up on a setup with only MSI interrupts enabled (no MSI-X). Solution is to add a default number of QPs = 1. This is enough, since without MSI-X support driver enables only a basic feature set. Fixes: bc6d33c8d93f ("i40e: Fix the number of queues available to be mapped for use") Signed-off-by: Dawid Lukwinski Signed-off-by: Michal Maloszewski Tested-by: Dave Switzer Signed-off-by: Tony Nguyen Link: https://lore.kernel.org/r/20220722175401.112572-1-anthony.l.nguyen@intel.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/i40e/i40e_main.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index c801b128e5b2bb..b07d55c99317ee 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -1908,11 +1908,15 @@ static void i40e_vsi_setup_queue_map(struct i40e_vsi *vsi, * non-zero req_queue_pairs says that user requested a new * queue count via ethtool's set_channels, so use this * value for queues distribution across traffic classes + * We need at least one queue pair for the interface + * to be usable as we see in else statement. */ if (vsi->req_queue_pairs > 0) vsi->num_queue_pairs = vsi->req_queue_pairs; else if (pf->flags & I40E_FLAG_MSIX_ENABLED) vsi->num_queue_pairs = pf->num_lan_msix; + else + vsi->num_queue_pairs = 1; } /* Number of queues per enabled TC */ From 3688939cd3e84b70875fae350400cbc52024567d Mon Sep 17 00:00:00 2001 From: Duoming Zhou Date: Sat, 23 Jul 2022 09:58:09 +0800 Subject: [PATCH 1010/1056] sctp: fix sleep in atomic context bug in timer handlers [ Upstream commit b89fc26f741d9f9efb51cba3e9b241cf1380ec5a ] There are sleep in atomic context bugs in timer handlers of sctp such as sctp_generate_t3_rtx_event(), sctp_generate_probe_event(), sctp_generate_t1_init_event(), sctp_generate_timeout_event(), sctp_generate_t3_rtx_event() and so on. The root cause is sctp_sched_prio_init_sid() with GFP_KERNEL parameter that may sleep could be called by different timer handlers which is in interrupt context. One of the call paths that could trigger bug is shown below: (interrupt context) sctp_generate_probe_event sctp_do_sm sctp_side_effects sctp_cmd_interpreter sctp_outq_teardown sctp_outq_init sctp_sched_set_sched n->init_sid(..,GFP_KERNEL) sctp_sched_prio_init_sid //may sleep This patch changes gfp_t parameter of init_sid in sctp_sched_set_sched() from GFP_KERNEL to GFP_ATOMIC in order to prevent sleep in atomic context bugs. Fixes: 5bbbbe32a431 ("sctp: introduce stream scheduler foundations") Signed-off-by: Duoming Zhou Acked-by: Marcelo Ricardo Leitner Link: https://lore.kernel.org/r/20220723015809.11553-1-duoming@zju.edu.cn Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/sctp/stream_sched.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sctp/stream_sched.c b/net/sctp/stream_sched.c index 99e5f69fbb7424..a2e1d34f52c5b0 100644 --- a/net/sctp/stream_sched.c +++ b/net/sctp/stream_sched.c @@ -163,7 +163,7 @@ int sctp_sched_set_sched(struct sctp_association *asoc, if (!SCTP_SO(&asoc->stream, i)->ext) continue; - ret = n->init_sid(&asoc->stream, i, GFP_KERNEL); + ret = n->init_sid(&asoc->stream, i, GFP_ATOMIC); if (ret) goto err; } From be5cd347ba22fbeacd26e35d8d8386b8e95bfe0f Mon Sep 17 00:00:00 2001 From: Sunil Goutham Date: Sun, 24 Jul 2022 13:51:13 +0530 Subject: [PATCH 1011/1056] octeontx2-pf: cn10k: Fix egress ratelimit configuration [ Upstream commit b354eaeec8637d87003945439209251d76a2bb95 ] NIX_AF_TLXX_PIR/CIR register format has changed from OcteonTx2 to CN10K. CN10K supports larger burst size. Fix burst exponent and burst mantissa configuration for CN10K. Also fixed 'maxrate' from u32 to u64 since 'police.rate_bytes_ps' passed by stack is also u64. Fixes: e638a83f167e ("octeontx2-pf: TC_MATCHALL egress ratelimiting offload") Signed-off-by: Sunil Goutham Signed-off-by: Subbaraya Sundeep Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- .../ethernet/marvell/octeontx2/nic/otx2_tc.c | 76 ++++++++++++++----- 1 file changed, 55 insertions(+), 21 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c index 5bcefc06ce5c46..75388a65f349eb 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c @@ -28,6 +28,9 @@ #define MAX_RATE_EXPONENT 0x0FULL #define MAX_RATE_MANTISSA 0xFFULL +#define CN10K_MAX_BURST_MANTISSA 0x7FFFULL +#define CN10K_MAX_BURST_SIZE 8453888ULL + /* Bitfields in NIX_TLX_PIR register */ #define TLX_RATE_MANTISSA GENMASK_ULL(8, 1) #define TLX_RATE_EXPONENT GENMASK_ULL(12, 9) @@ -35,6 +38,9 @@ #define TLX_BURST_MANTISSA GENMASK_ULL(36, 29) #define TLX_BURST_EXPONENT GENMASK_ULL(40, 37) +#define CN10K_TLX_BURST_MANTISSA GENMASK_ULL(43, 29) +#define CN10K_TLX_BURST_EXPONENT GENMASK_ULL(47, 44) + struct otx2_tc_flow_stats { u64 bytes; u64 pkts; @@ -77,33 +83,42 @@ int otx2_tc_alloc_ent_bitmap(struct otx2_nic *nic) } EXPORT_SYMBOL(otx2_tc_alloc_ent_bitmap); -static void otx2_get_egress_burst_cfg(u32 burst, u32 *burst_exp, - u32 *burst_mantissa) +static void otx2_get_egress_burst_cfg(struct otx2_nic *nic, u32 burst, + u32 *burst_exp, u32 *burst_mantissa) { + int max_burst, max_mantissa; unsigned int tmp; + if (is_dev_otx2(nic->pdev)) { + max_burst = MAX_BURST_SIZE; + max_mantissa = MAX_BURST_MANTISSA; + } else { + max_burst = CN10K_MAX_BURST_SIZE; + max_mantissa = CN10K_MAX_BURST_MANTISSA; + } + /* Burst is calculated as * ((256 + BURST_MANTISSA) << (1 + BURST_EXPONENT)) / 256 * Max supported burst size is 130,816 bytes. */ - burst = min_t(u32, burst, MAX_BURST_SIZE); + burst = min_t(u32, burst, max_burst); if (burst) { *burst_exp = ilog2(burst) ? ilog2(burst) - 1 : 0; tmp = burst - rounddown_pow_of_two(burst); - if (burst < MAX_BURST_MANTISSA) + if (burst < max_mantissa) *burst_mantissa = tmp * 2; else *burst_mantissa = tmp / (1ULL << (*burst_exp - 7)); } else { *burst_exp = MAX_BURST_EXPONENT; - *burst_mantissa = MAX_BURST_MANTISSA; + *burst_mantissa = max_mantissa; } } -static void otx2_get_egress_rate_cfg(u32 maxrate, u32 *exp, +static void otx2_get_egress_rate_cfg(u64 maxrate, u32 *exp, u32 *mantissa, u32 *div_exp) { - unsigned int tmp; + u64 tmp; /* Rate calculation by hardware * @@ -132,21 +147,44 @@ static void otx2_get_egress_rate_cfg(u32 maxrate, u32 *exp, } } -static int otx2_set_matchall_egress_rate(struct otx2_nic *nic, u32 burst, u32 maxrate) +static u64 otx2_get_txschq_rate_regval(struct otx2_nic *nic, + u64 maxrate, u32 burst) { - struct otx2_hw *hw = &nic->hw; - struct nix_txschq_config *req; u32 burst_exp, burst_mantissa; u32 exp, mantissa, div_exp; + u64 regval = 0; + + /* Get exponent and mantissa values from the desired rate */ + otx2_get_egress_burst_cfg(nic, burst, &burst_exp, &burst_mantissa); + otx2_get_egress_rate_cfg(maxrate, &exp, &mantissa, &div_exp); + + if (is_dev_otx2(nic->pdev)) { + regval = FIELD_PREP(TLX_BURST_EXPONENT, (u64)burst_exp) | + FIELD_PREP(TLX_BURST_MANTISSA, (u64)burst_mantissa) | + FIELD_PREP(TLX_RATE_DIVIDER_EXPONENT, div_exp) | + FIELD_PREP(TLX_RATE_EXPONENT, exp) | + FIELD_PREP(TLX_RATE_MANTISSA, mantissa) | BIT_ULL(0); + } else { + regval = FIELD_PREP(CN10K_TLX_BURST_EXPONENT, (u64)burst_exp) | + FIELD_PREP(CN10K_TLX_BURST_MANTISSA, (u64)burst_mantissa) | + FIELD_PREP(TLX_RATE_DIVIDER_EXPONENT, div_exp) | + FIELD_PREP(TLX_RATE_EXPONENT, exp) | + FIELD_PREP(TLX_RATE_MANTISSA, mantissa) | BIT_ULL(0); + } + + return regval; +} + +static int otx2_set_matchall_egress_rate(struct otx2_nic *nic, + u32 burst, u64 maxrate) +{ + struct otx2_hw *hw = &nic->hw; + struct nix_txschq_config *req; int txschq, err; /* All SQs share the same TL4, so pick the first scheduler */ txschq = hw->txschq_list[NIX_TXSCH_LVL_TL4][0]; - /* Get exponent and mantissa values from the desired rate */ - otx2_get_egress_burst_cfg(burst, &burst_exp, &burst_mantissa); - otx2_get_egress_rate_cfg(maxrate, &exp, &mantissa, &div_exp); - mutex_lock(&nic->mbox.lock); req = otx2_mbox_alloc_msg_nix_txschq_cfg(&nic->mbox); if (!req) { @@ -157,11 +195,7 @@ static int otx2_set_matchall_egress_rate(struct otx2_nic *nic, u32 burst, u32 ma req->lvl = NIX_TXSCH_LVL_TL4; req->num_regs = 1; req->reg[0] = NIX_AF_TL4X_PIR(txschq); - req->regval[0] = FIELD_PREP(TLX_BURST_EXPONENT, burst_exp) | - FIELD_PREP(TLX_BURST_MANTISSA, burst_mantissa) | - FIELD_PREP(TLX_RATE_DIVIDER_EXPONENT, div_exp) | - FIELD_PREP(TLX_RATE_EXPONENT, exp) | - FIELD_PREP(TLX_RATE_MANTISSA, mantissa) | BIT_ULL(0); + req->regval[0] = otx2_get_txschq_rate_regval(nic, maxrate, burst); err = otx2_sync_mbox_msg(&nic->mbox); mutex_unlock(&nic->mbox.lock); @@ -196,7 +230,7 @@ static int otx2_tc_egress_matchall_install(struct otx2_nic *nic, struct netlink_ext_ack *extack = cls->common.extack; struct flow_action *actions = &cls->rule->action; struct flow_action_entry *entry; - u32 rate; + u64 rate; int err; err = otx2_tc_validate_flow(nic, actions, extack); @@ -218,7 +252,7 @@ static int otx2_tc_egress_matchall_install(struct otx2_nic *nic, } /* Convert bytes per second to Mbps */ rate = entry->police.rate_bytes_ps * 8; - rate = max_t(u32, rate / 1000000, 1); + rate = max_t(u64, rate / 1000000, 1); err = otx2_set_matchall_egress_rate(nic, entry->police.burst, rate); if (err) return err; From 91c11008aab0282957b8b8ccb0707d90e74cc3b9 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 26 Jul 2022 12:42:06 +0200 Subject: [PATCH 1012/1056] netfilter: nf_queue: do not allow packet truncation below transport header offset [ Upstream commit 99a63d36cb3ed5ca3aa6fcb64cffbeaf3b0fb164 ] Domingo Dirutigliano and Nicola Guerrera report kernel panic when sending nf_queue verdict with 1-byte nfta_payload attribute. The IP/IPv6 stack pulls the IP(v6) header from the packet after the input hook. If user truncates the packet below the header size, this skb_pull() will result in a malformed skb (skb->len < 0). Fixes: 7af4cc3fa158 ("[NETFILTER]: Add "nfnetlink_queue" netfilter queue handler over nfnetlink") Reported-by: Domingo Dirutigliano Signed-off-by: Florian Westphal Reviewed-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/nfnetlink_queue.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index 8787d0613ad83e..5329ebf19a18b5 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -836,11 +836,16 @@ nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum) } static int -nfqnl_mangle(void *data, int data_len, struct nf_queue_entry *e, int diff) +nfqnl_mangle(void *data, unsigned int data_len, struct nf_queue_entry *e, int diff) { struct sk_buff *nskb; if (diff < 0) { + unsigned int min_len = skb_transport_offset(e->skb); + + if (data_len < min_len) + return -EINVAL; + if (pskb_trim(e->skb, data_len)) return -ENOMEM; } else if (diff > 0) { From 871168abe6d831b7c784bbf19adfa6fae50b14b1 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Mon, 25 Jul 2022 15:21:59 +0800 Subject: [PATCH 1013/1056] virtio-net: fix the race between refill work and close [ Upstream commit 5a159128faff151b7fe5f4eb0f310b1e0a2d56bf ] We try using cancel_delayed_work_sync() to prevent the work from enabling NAPI. This is insufficient since we don't disable the source of the refill work scheduling. This means an NAPI poll callback after cancel_delayed_work_sync() can schedule the refill work then can re-enable the NAPI that leads to use-after-free [1]. Since the work can enable NAPI, we can't simply disable NAPI before calling cancel_delayed_work_sync(). So fix this by introducing a dedicated boolean to control whether or not the work could be scheduled from NAPI. [1] ================================================================== BUG: KASAN: use-after-free in refill_work+0x43/0xd4 Read of size 2 at addr ffff88810562c92e by task kworker/2:1/42 CPU: 2 PID: 42 Comm: kworker/2:1 Not tainted 5.19.0-rc1+ #480 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014 Workqueue: events refill_work Call Trace: dump_stack_lvl+0x34/0x44 print_report.cold+0xbb/0x6ac ? _printk+0xad/0xde ? refill_work+0x43/0xd4 kasan_report+0xa8/0x130 ? refill_work+0x43/0xd4 refill_work+0x43/0xd4 process_one_work+0x43d/0x780 worker_thread+0x2a0/0x6f0 ? process_one_work+0x780/0x780 kthread+0x167/0x1a0 ? kthread_exit+0x50/0x50 ret_from_fork+0x22/0x30 ... Fixes: b2baed69e605c ("virtio_net: set/cancel work on ndo_open/ndo_stop") Signed-off-by: Jason Wang Acked-by: Michael S. Tsirkin Reviewed-by: Xuan Zhuo Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/virtio_net.c | 37 ++++++++++++++++++++++++++++++++++--- 1 file changed, 34 insertions(+), 3 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 318c681ad63ee2..53cefad2a79d25 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -213,9 +213,15 @@ struct virtnet_info { /* Packet virtio header size */ u8 hdr_len; - /* Work struct for refilling if we run low on memory. */ + /* Work struct for delayed refilling if we run low on memory. */ struct delayed_work refill; + /* Is delayed refill enabled? */ + bool refill_enabled; + + /* The lock to synchronize the access to refill_enabled */ + spinlock_t refill_lock; + /* Work struct for config space updates */ struct work_struct config_work; @@ -319,6 +325,20 @@ static struct page *get_a_page(struct receive_queue *rq, gfp_t gfp_mask) return p; } +static void enable_delayed_refill(struct virtnet_info *vi) +{ + spin_lock_bh(&vi->refill_lock); + vi->refill_enabled = true; + spin_unlock_bh(&vi->refill_lock); +} + +static void disable_delayed_refill(struct virtnet_info *vi) +{ + spin_lock_bh(&vi->refill_lock); + vi->refill_enabled = false; + spin_unlock_bh(&vi->refill_lock); +} + static void virtqueue_napi_schedule(struct napi_struct *napi, struct virtqueue *vq) { @@ -1454,8 +1474,12 @@ static int virtnet_receive(struct receive_queue *rq, int budget, } if (rq->vq->num_free > min((unsigned int)budget, virtqueue_get_vring_size(rq->vq)) / 2) { - if (!try_fill_recv(vi, rq, GFP_ATOMIC)) - schedule_delayed_work(&vi->refill, 0); + if (!try_fill_recv(vi, rq, GFP_ATOMIC)) { + spin_lock(&vi->refill_lock); + if (vi->refill_enabled) + schedule_delayed_work(&vi->refill, 0); + spin_unlock(&vi->refill_lock); + } } u64_stats_update_begin(&rq->stats.syncp); @@ -1578,6 +1602,8 @@ static int virtnet_open(struct net_device *dev) struct virtnet_info *vi = netdev_priv(dev); int i, err; + enable_delayed_refill(vi); + for (i = 0; i < vi->max_queue_pairs; i++) { if (i < vi->curr_queue_pairs) /* Make sure we have some buffers: if oom use wq. */ @@ -1958,6 +1984,8 @@ static int virtnet_close(struct net_device *dev) struct virtnet_info *vi = netdev_priv(dev); int i; + /* Make sure NAPI doesn't schedule refill work */ + disable_delayed_refill(vi); /* Make sure refill_work doesn't re-enable napi! */ cancel_delayed_work_sync(&vi->refill); @@ -2455,6 +2483,8 @@ static int virtnet_restore_up(struct virtio_device *vdev) virtio_device_ready(vdev); + enable_delayed_refill(vi); + if (netif_running(vi->dev)) { err = virtnet_open(vi->dev); if (err) @@ -3162,6 +3192,7 @@ static int virtnet_probe(struct virtio_device *vdev) vdev->priv = vi; INIT_WORK(&vi->config_work, virtnet_config_changed_work); + spin_lock_init(&vi->refill_lock); /* If we can receive ANY GSO packets, we must allocate large ones. */ if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO4) || From fe0e602f050228940443d00a611a23a3a52f2730 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Sun, 24 Jul 2022 14:00:12 +0800 Subject: [PATCH 1014/1056] perf symbol: Correct address for bss symbols [ Upstream commit 2d86612aacb7805f72873691a2644d7279ed0630 ] When using 'perf mem' and 'perf c2c', an issue is observed that tool reports the wrong offset for global data symbols. This is a common issue on both x86 and Arm64 platforms. Let's see an example, for a test program, below is the disassembly for its .bss section which is dumped with objdump: ... Disassembly of section .bss: 0000000000004040 : ... 0000000000004080 : ... 00000000000040c0 : ... 0000000000004100 : ... First we used 'perf mem record' to run the test program and then used 'perf --debug verbose=4 mem report' to observe what's the symbol info for 'buf1' and 'buf2' structures. # ./perf mem record -e ldlat-loads,ldlat-stores -- false_sharing.exe 8 # ./perf --debug verbose=4 mem report ... dso__load_sym_internal: adjusting symbol: st_value: 0x40c0 sh_addr: 0x4040 sh_offset: 0x3028 symbol__new: buf2 0x30a8-0x30e8 ... dso__load_sym_internal: adjusting symbol: st_value: 0x4080 sh_addr: 0x4040 sh_offset: 0x3028 symbol__new: buf1 0x3068-0x30a8 ... The perf tool relies on libelf to parse symbols, in executable and shared object files, 'st_value' holds a virtual address; 'sh_addr' is the address at which section's first byte should reside in memory, and 'sh_offset' is the byte offset from the beginning of the file to the first byte in the section. The perf tool uses below formula to convert a symbol's memory address to a file address: file_address = st_value - sh_addr + sh_offset ^ ` Memory address We can see the final adjusted address ranges for buf1 and buf2 are [0x30a8-0x30e8) and [0x3068-0x30a8) respectively, apparently this is incorrect, in the code, the structure for 'buf1' and 'buf2' specifies compiler attribute with 64-byte alignment. The problem happens for 'sh_offset', libelf returns it as 0x3028 which is not 64-byte aligned, combining with disassembly, it's likely libelf doesn't respect the alignment for .bss section, therefore, it doesn't return the aligned value for 'sh_offset'. Suggested by Fangrui Song, ELF file contains program header which contains PT_LOAD segments, the fields p_vaddr and p_offset in PT_LOAD segments contain the execution info. A better choice for converting memory address to file address is using the formula: file_address = st_value - p_vaddr + p_offset This patch introduces elf_read_program_header() which returns the program header based on the passed 'st_value', then it uses the formula above to calculate the symbol file address; and the debugging log is updated respectively. After applying the change: # ./perf --debug verbose=4 mem report ... dso__load_sym_internal: adjusting symbol: st_value: 0x40c0 p_vaddr: 0x3d28 p_offset: 0x2d28 symbol__new: buf2 0x30c0-0x3100 ... dso__load_sym_internal: adjusting symbol: st_value: 0x4080 p_vaddr: 0x3d28 p_offset: 0x2d28 symbol__new: buf1 0x3080-0x30c0 ... Fixes: f17e04afaff84b5c ("perf report: Fix ELF symbol parsing") Reported-by: Chang Rui Suggested-by: Fangrui Song Signed-off-by: Leo Yan Acked-by: Namhyung Kim Cc: Alexander Shishkin Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mark Rutland Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20220724060013.171050-2-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/util/symbol-elf.c | 45 ++++++++++++++++++++++++++++++++---- 1 file changed, 41 insertions(+), 4 deletions(-) diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index ecd377938eea8e..ef6ced5c5746af 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -233,6 +233,33 @@ Elf_Scn *elf_section_by_name(Elf *elf, GElf_Ehdr *ep, return NULL; } +static int elf_read_program_header(Elf *elf, u64 vaddr, GElf_Phdr *phdr) +{ + size_t i, phdrnum; + u64 sz; + + if (elf_getphdrnum(elf, &phdrnum)) + return -1; + + for (i = 0; i < phdrnum; i++) { + if (gelf_getphdr(elf, i, phdr) == NULL) + return -1; + + if (phdr->p_type != PT_LOAD) + continue; + + sz = max(phdr->p_memsz, phdr->p_filesz); + if (!sz) + continue; + + if (vaddr >= phdr->p_vaddr && (vaddr < phdr->p_vaddr + sz)) + return 0; + } + + /* Not found any valid program header */ + return -1; +} + static bool want_demangle(bool is_kernel_sym) { return is_kernel_sym ? symbol_conf.demangle_kernel : symbol_conf.demangle; @@ -1209,6 +1236,7 @@ dso__load_sym_internal(struct dso *dso, struct map *map, struct symsrc *syms_ss, sym.st_value); used_opd = true; } + /* * When loading symbols in a data mapping, ABS symbols (which * has a value of SHN_ABS in its st_shndx) failed at @@ -1262,11 +1290,20 @@ dso__load_sym_internal(struct dso *dso, struct map *map, struct symsrc *syms_ss, goto out_elf_end; } else if ((used_opd && runtime_ss->adjust_symbols) || (!used_opd && syms_ss->adjust_symbols)) { + GElf_Phdr phdr; + + if (elf_read_program_header(syms_ss->elf, + (u64)sym.st_value, &phdr)) { + pr_warning("%s: failed to find program header for " + "symbol: %s st_value: %#" PRIx64 "\n", + __func__, elf_name, (u64)sym.st_value); + continue; + } pr_debug4("%s: adjusting symbol: st_value: %#" PRIx64 " " - "sh_addr: %#" PRIx64 " sh_offset: %#" PRIx64 "\n", __func__, - (u64)sym.st_value, (u64)shdr.sh_addr, - (u64)shdr.sh_offset); - sym.st_value -= shdr.sh_addr - shdr.sh_offset; + "p_vaddr: %#" PRIx64 " p_offset: %#" PRIx64 "\n", + __func__, (u64)sym.st_value, (u64)phdr.p_vaddr, + (u64)phdr.p_offset); + sym.st_value -= phdr.p_vaddr - phdr.p_offset; } demangled = demangle_sym(dso, kmodule, elf_name); From f7c2a9c5435abbe04c239a1902d83c0c1c9584c3 Mon Sep 17 00:00:00 2001 From: Alejandro Lucero Date: Tue, 26 Jul 2022 08:45:04 +0200 Subject: [PATCH 1015/1056] sfc: disable softirqs for ptp TX [ Upstream commit 67c3b611d92fc238c43734878bc3e232ab570c79 ] Sending a PTP packet can imply to use the normal TX driver datapath but invoked from the driver's ptp worker. The kernel generic TX code disables softirqs and preemption before calling specific driver TX code, but the ptp worker does not. Although current ptp driver functionality does not require it, there are several reasons for doing so: 1) The invoked code is always executed with softirqs disabled for non PTP packets. 2) Better if a ptp packet transmission is not interrupted by softirq handling which could lead to high latencies. 3) netdev_xmit_more used by the TX code requires preemption to be disabled. Indeed a solution for dealing with kernel preemption state based on static kernel configuration is not possible since the introduction of dynamic preemption level configuration at boot time using the static calls functionality. Fixes: f79c957a0b537 ("drivers: net: sfc: use netdev_xmit_more helper") Signed-off-by: Alejandro Lucero Link: https://lore.kernel.org/r/20220726064504.49613-1-alejandro.lucero-palau@amd.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/sfc/ptp.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/drivers/net/ethernet/sfc/ptp.c b/drivers/net/ethernet/sfc/ptp.c index 725b0f38813a96..a2b4e3befa5918 100644 --- a/drivers/net/ethernet/sfc/ptp.c +++ b/drivers/net/ethernet/sfc/ptp.c @@ -1100,7 +1100,29 @@ static void efx_ptp_xmit_skb_queue(struct efx_nic *efx, struct sk_buff *skb) tx_queue = efx_channel_get_tx_queue(ptp_data->channel, type); if (tx_queue && tx_queue->timestamping) { + /* This code invokes normal driver TX code which is always + * protected from softirqs when called from generic TX code, + * which in turn disables preemption. Look at __dev_queue_xmit + * which uses rcu_read_lock_bh disabling preemption for RCU + * plus disabling softirqs. We do not need RCU reader + * protection here. + * + * Although it is theoretically safe for current PTP TX/RX code + * running without disabling softirqs, there are three good + * reasond for doing so: + * + * 1) The code invoked is mainly implemented for non-PTP + * packets and it is always executed with softirqs + * disabled. + * 2) This being a single PTP packet, better to not + * interrupt its processing by softirqs which can lead + * to high latencies. + * 3) netdev_xmit_more checks preemption is disabled and + * triggers a BUG_ON if not. + */ + local_bh_disable(); efx_enqueue_skb(tx_queue, skb); + local_bh_enable(); } else { WARN_ONCE(1, "PTP channel has no timestamped tx queue\n"); dev_kfree_skb_any(skb); From e796e1fe20ecaf6da419ef6a5841ba181bba7a0c Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 25 Jul 2022 18:11:06 -0400 Subject: [PATCH 1016/1056] sctp: leave the err path free in sctp_stream_init to sctp_stream_free [ Upstream commit 181d8d2066c000ba0a0e6940a7ad80f1a0e68e9d ] A NULL pointer dereference was reported by Wei Chen: BUG: kernel NULL pointer dereference, address: 0000000000000000 RIP: 0010:__list_del_entry_valid+0x26/0x80 Call Trace: sctp_sched_dequeue_common+0x1c/0x90 sctp_sched_prio_dequeue+0x67/0x80 __sctp_outq_teardown+0x299/0x380 sctp_outq_free+0x15/0x20 sctp_association_free+0xc3/0x440 sctp_do_sm+0x1ca7/0x2210 sctp_assoc_bh_rcv+0x1f6/0x340 This happens when calling sctp_sendmsg without connecting to server first. In this case, a data chunk already queues up in send queue of client side when processing the INIT_ACK from server in sctp_process_init() where it calls sctp_stream_init() to alloc stream_in. If it fails to alloc stream_in all stream_out will be freed in sctp_stream_init's err path. Then in the asoc freeing it will crash when dequeuing this data chunk as stream_out is missing. As we can't free stream out before dequeuing all data from send queue, and this patch is to fix it by moving the err path stream_out/in freeing in sctp_stream_init() to sctp_stream_free() which is eventually called when freeing the asoc in sctp_association_free(). This fix also makes the code in sctp_process_init() more clear. Note that in sctp_association_init() when it fails in sctp_stream_init(), sctp_association_free() will not be called, and in that case it should go to 'stream_free' err path to free stream instead of 'fail_init'. Fixes: 5bbbbe32a431 ("sctp: introduce stream scheduler foundations") Reported-by: Wei Chen Signed-off-by: Xin Long Link: https://lore.kernel.org/r/831a3dc100c4908ff76e5bcc363be97f2778bc0b.1658787066.git.lucien.xin@gmail.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/sctp/associola.c | 5 ++--- net/sctp/stream.c | 19 +++---------------- 2 files changed, 5 insertions(+), 19 deletions(-) diff --git a/net/sctp/associola.c b/net/sctp/associola.c index be29da09cc7ab0..3460abceba443b 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -229,9 +229,8 @@ static struct sctp_association *sctp_association_init( if (!sctp_ulpq_init(&asoc->ulpq, asoc)) goto fail_init; - if (sctp_stream_init(&asoc->stream, asoc->c.sinit_num_ostreams, - 0, gfp)) - goto fail_init; + if (sctp_stream_init(&asoc->stream, asoc->c.sinit_num_ostreams, 0, gfp)) + goto stream_free; /* Initialize default path MTU. */ asoc->pathmtu = sp->pathmtu; diff --git a/net/sctp/stream.c b/net/sctp/stream.c index 6dc95dcc0ff4f0..ef9fceadef8d5a 100644 --- a/net/sctp/stream.c +++ b/net/sctp/stream.c @@ -137,7 +137,7 @@ int sctp_stream_init(struct sctp_stream *stream, __u16 outcnt, __u16 incnt, ret = sctp_stream_alloc_out(stream, outcnt, gfp); if (ret) - goto out_err; + return ret; for (i = 0; i < stream->outcnt; i++) SCTP_SO(stream, i)->state = SCTP_STREAM_OPEN; @@ -145,22 +145,9 @@ int sctp_stream_init(struct sctp_stream *stream, __u16 outcnt, __u16 incnt, handle_in: sctp_stream_interleave_init(stream); if (!incnt) - goto out; - - ret = sctp_stream_alloc_in(stream, incnt, gfp); - if (ret) - goto in_err; - - goto out; + return 0; -in_err: - sched->free(stream); - genradix_free(&stream->in); -out_err: - genradix_free(&stream->out); - stream->outcnt = 0; -out: - return ret; + return sctp_stream_alloc_in(stream, incnt, gfp); } int sctp_stream_init_ext(struct sctp_stream *stream, __u16 sid) From 350fcb5e7bbb7d808ee0bc54f9c66aefc70f597a Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sun, 31 Jul 2022 12:05:51 +0200 Subject: [PATCH 1017/1056] ARM: crypto: comment out gcc warning that breaks clang builds The gcc build warning prevents all clang-built kernels from working properly, so comment it out to fix the build. This is a -stable kernel only patch for now, it will be resolved differently in mainline releases in the future. Cc: "Jason A. Donenfeld" Cc: "Justin M. Forbes" Cc: Ard Biesheuvel Acked-by: Arnd Bergmann Cc: Nicolas Pitre Cc: Nathan Chancellor Cc: Nick Desaulniers Signed-off-by: Greg Kroah-Hartman --- arch/arm/lib/xor-neon.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm/lib/xor-neon.c b/arch/arm/lib/xor-neon.c index b99dd8e1c93f17..7ba6cf82616264 100644 --- a/arch/arm/lib/xor-neon.c +++ b/arch/arm/lib/xor-neon.c @@ -26,8 +26,9 @@ MODULE_LICENSE("GPL"); * While older versions of GCC do not generate incorrect code, they fail to * recognize the parallel nature of these functions, and emit plain ARM code, * which is known to be slower than the optimized ARM code in asm-arm/xor.h. + * + * #warning This code requires at least version 4.6 of GCC */ -#warning This code requires at least version 4.6 of GCC #endif #pragma GCC diagnostic ignored "-Wunused-variable" From 51a772c34ea49173642ca3e06f1c5a68d19e8fbf Mon Sep 17 00:00:00 2001 From: Ralph Campbell Date: Mon, 25 Jul 2022 11:36:14 -0700 Subject: [PATCH 1018/1056] mm/hmm: fault non-owner device private entries commit 8a295dbbaf7292c582a40ce469c326f472d51f66 upstream. If hmm_range_fault() is called with the HMM_PFN_REQ_FAULT flag and a device private PTE is found, the hmm_range::dev_private_owner page is used to determine if the device private page should not be faulted in. However, if the device private page is not owned by the caller, hmm_range_fault() returns an error instead of calling migrate_to_ram() to fault in the page. For example, if a page is migrated to GPU private memory and a RDMA fault capable NIC tries to read the migrated page, without this patch it will get an error. With this patch, the page will be migrated back to system memory and the NIC will be able to read the data. Link: https://lkml.kernel.org/r/20220727000837.4128709-2-rcampbell@nvidia.com Link: https://lkml.kernel.org/r/20220725183615.4118795-2-rcampbell@nvidia.com Fixes: 08ddddda667b ("mm/hmm: check the device private page owner in hmm_range_fault()") Signed-off-by: Ralph Campbell Reported-by: Felix Kuehling Reviewed-by: Alistair Popple Cc: Philip Yang Cc: Jason Gunthorpe Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- mm/hmm.c | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/mm/hmm.c b/mm/hmm.c index bd56641c79d4ee..3af995c814a66c 100644 --- a/mm/hmm.c +++ b/mm/hmm.c @@ -212,14 +212,6 @@ int hmm_vma_handle_pmd(struct mm_walk *walk, unsigned long addr, unsigned long end, unsigned long hmm_pfns[], pmd_t pmd); #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ -static inline bool hmm_is_device_private_entry(struct hmm_range *range, - swp_entry_t entry) -{ - return is_device_private_entry(entry) && - pfn_swap_entry_to_page(entry)->pgmap->owner == - range->dev_private_owner; -} - static inline unsigned long pte_to_hmm_pfn_flags(struct hmm_range *range, pte_t pte) { @@ -252,10 +244,12 @@ static int hmm_vma_handle_pte(struct mm_walk *walk, unsigned long addr, swp_entry_t entry = pte_to_swp_entry(pte); /* - * Never fault in device private pages, but just report - * the PFN even if not present. + * Don't fault in device private pages owned by the caller, + * just report the PFN. */ - if (hmm_is_device_private_entry(range, entry)) { + if (is_device_private_entry(entry) && + pfn_swap_entry_to_page(entry)->pgmap->owner == + range->dev_private_owner) { cpu_flags = HMM_PFN_VALID; if (is_writable_device_private_entry(entry)) cpu_flags |= HMM_PFN_WRITE; @@ -273,6 +267,9 @@ static int hmm_vma_handle_pte(struct mm_walk *walk, unsigned long addr, if (!non_swap_entry(entry)) goto fault; + if (is_device_private_entry(entry)) + goto fault; + if (is_device_exclusive_entry(entry)) goto fault; From 86e83233dd01096a485aef3a3caef36640c21b78 Mon Sep 17 00:00:00 2001 From: Jaewon Kim Date: Mon, 25 Jul 2022 18:52:12 +0900 Subject: [PATCH 1019/1056] page_alloc: fix invalid watermark check on a negative value commit 9282012fc0aa248b77a69f5eb802b67c5a16bb13 upstream. There was a report that a task is waiting at the throttle_direct_reclaim. The pgscan_direct_throttle in vmstat was increasing. This is a bug where zone_watermark_fast returns true even when the free is very low. The commit f27ce0e14088 ("page_alloc: consider highatomic reserve in watermark fast") changed the watermark fast to consider highatomic reserve. But it did not handle a negative value case which can be happened when reserved_highatomic pageblock is bigger than the actual free. If watermark is considered as ok for the negative value, allocating contexts for order-0 will consume all free pages without direct reclaim, and finally free page may become depleted except highatomic free. Then allocating contexts may fall into throttle_direct_reclaim. This symptom may easily happen in a system where wmark min is low and other reclaimers like kswapd does not make free pages quickly. Handle the negative case by using MIN. Link: https://lkml.kernel.org/r/20220725095212.25388-1-jaewon31.kim@samsung.com Fixes: f27ce0e14088 ("page_alloc: consider highatomic reserve in watermark fast") Signed-off-by: Jaewon Kim Reported-by: GyeongHwan Hong Acked-by: Mel Gorman Cc: Minchan Kim Cc: Baoquan He Cc: Vlastimil Babka Cc: Johannes Weiner Cc: Michal Hocko Cc: Yong-Taek Lee Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- mm/page_alloc.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index a0b7afae59e9cb..61d7967897ced4 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -3928,11 +3928,15 @@ static inline bool zone_watermark_fast(struct zone *z, unsigned int order, * need to be calculated. */ if (!order) { - long fast_free; + long usable_free; + long reserved; - fast_free = free_pages; - fast_free -= __zone_watermark_unusable_free(z, 0, alloc_flags); - if (fast_free > mark + z->lowmem_reserve[highest_zoneidx]) + usable_free = free_pages; + reserved = __zone_watermark_unusable_free(z, 0, alloc_flags); + + /* reserved may over estimate high-atomic reserves. */ + usable_free -= min(usable_free, reserved); + if (usable_free > mark + z->lowmem_reserve[highest_zoneidx]) return true; } From 30dc2effc74a729ae97d302b06708f8e6fb8ae36 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 19 Jul 2022 17:33:21 +0100 Subject: [PATCH 1020/1056] ARM: 9216/1: Fix MAX_DMA_ADDRESS overflow [ Upstream commit fb0fd3469ead5b937293c213daa1f589b4b7ce46 ] Commit 26f09e9b3a06 ("mm/memblock: add memblock memory allocation apis") added a check to determine whether arm_dma_zone_size is exceeding the amount of kernel virtual address space available between the upper 4GB virtual address limit and PAGE_OFFSET in order to provide a suitable definition of MAX_DMA_ADDRESS that should fit within the 32-bit virtual address space. The quantity used for comparison was off by a missing trailing 0, leading to MAX_DMA_ADDRESS to be overflowing a 32-bit quantity. This was caught thanks to CONFIG_DEBUG_VIRTUAL on the bcm2711 platform where we define a dma_zone_size of 1GB and we have a PAGE_OFFSET value of 0xc000_0000 (CONFIG_VMSPLIT_3G) leading to MAX_DMA_ADDRESS being 0x1_0000_0000 which overflows the unsigned long type used throughout __pa() and then __virt_addr_valid(). Because the virtual address passed to __virt_addr_valid() would now be 0, the function would loudly warn and flood the kernel log, thus making the platform unable to boot properly. Fixes: 26f09e9b3a06 ("mm/memblock: add memblock memory allocation apis") Signed-off-by: Florian Fainelli Reviewed-by: Linus Walleij Signed-off-by: Russell King (Oracle) Signed-off-by: Sasha Levin --- arch/arm/include/asm/dma.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/include/asm/dma.h b/arch/arm/include/asm/dma.h index a81dda65c57622..45180a2cc47cbc 100644 --- a/arch/arm/include/asm/dma.h +++ b/arch/arm/include/asm/dma.h @@ -10,7 +10,7 @@ #else #define MAX_DMA_ADDRESS ({ \ extern phys_addr_t arm_dma_zone_size; \ - arm_dma_zone_size && arm_dma_zone_size < (0x10000000 - PAGE_OFFSET) ? \ + arm_dma_zone_size && arm_dma_zone_size < (0x100000000ULL - PAGE_OFFSET) ? \ (PAGE_OFFSET + arm_dma_zone_size) : 0xffffffffUL; }) #endif From 7bada8b0bdf1c22260bf8c71669778fb1bdfc266 Mon Sep 17 00:00:00 2001 From: Toshi Kani Date: Thu, 21 Jul 2022 12:05:03 -0600 Subject: [PATCH 1021/1056] EDAC/ghes: Set the DIMM label unconditionally commit 5e2805d5379619c4a2e3ae4994e73b36439f4bad upstream. The commit cb51a371d08e ("EDAC/ghes: Setup DIMM label from DMI and use it in error reports") enforced that both the bank and device strings passed to dimm_setup_label() are not NULL. However, there are BIOSes, for example on a HPE ProLiant DL360 Gen10/ProLiant DL360 Gen10, BIOS U32 03/15/2019 which don't populate both strings: Handle 0x0020, DMI type 17, 84 bytes Memory Device Array Handle: 0x0013 Error Information Handle: Not Provided Total Width: 72 bits Data Width: 64 bits Size: 32 GB Form Factor: DIMM Set: None Locator: PROC 1 DIMM 1 <===== device Bank Locator: Not Specified <===== bank This results in a buffer overflow because ghes_edac_register() calls strlen() on an uninitialized label, which had non-zero values left over from krealloc_array(): detected buffer overflow in __fortify_strlen ------------[ cut here ]------------ kernel BUG at lib/string_helpers.c:983! invalid opcode: 0000 [#1] PREEMPT SMP NOPTI CPU: 1 PID: 1 Comm: swapper/0 Tainted: G I 5.18.6-200.fc36.x86_64 #1 Hardware name: HPE ProLiant DL360 Gen10/ProLiant DL360 Gen10, BIOS U32 03/15/2019 RIP: 0010:fortify_panic ... Call Trace: ghes_edac_register.cold ghes_probe platform_probe really_probe __driver_probe_device driver_probe_device __driver_attach ? __device_attach_driver bus_for_each_dev bus_add_driver driver_register acpi_ghes_init acpi_init ? acpi_sleep_proc_init do_one_initcall The label contains garbage because the commit in Fixes reallocs the DIMMs array while scanning the system but doesn't clear the newly allocated memory. Change dimm_setup_label() to always initialize the label to fix the issue. Set it to the empty string in case BIOS does not provide both bank and device so that ghes_edac_register() can keep the default label given by edac_mc_alloc_dimms(). [ bp: Rewrite commit message. ] Fixes: b9cae27728d1f ("EDAC/ghes: Scan the system once on driver init") Co-developed-by: Robert Richter Signed-off-by: Robert Richter Signed-off-by: Toshi Kani Signed-off-by: Borislav Petkov Tested-by: Robert Elliott Cc: Link: https://lore.kernel.org/r/20220719220124.760359-1-toshi.kani@hpe.com Signed-off-by: Greg Kroah-Hartman --- drivers/edac/ghes_edac.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/edac/ghes_edac.c b/drivers/edac/ghes_edac.c index 6d1ddecbf0da36..d0a9ccf640c4bb 100644 --- a/drivers/edac/ghes_edac.c +++ b/drivers/edac/ghes_edac.c @@ -101,9 +101,14 @@ static void dimm_setup_label(struct dimm_info *dimm, u16 handle) dmi_memdev_name(handle, &bank, &device); - /* both strings must be non-zero */ - if (bank && *bank && device && *device) - snprintf(dimm->label, sizeof(dimm->label), "%s %s", bank, device); + /* + * Set to a NULL string when both bank and device are zero. In this case, + * the label assigned by default will be preserved. + */ + snprintf(dimm->label, sizeof(dimm->label), "%s%s%s", + (bank && *bank) ? bank : "", + (bank && *bank && device && *device) ? " " : "", + (device && *device) ? device : ""); } static void assign_dmi_dimm_info(struct dimm_info *dimm, struct memdev_dmi_entry *entry) From 66d31cef4806d9bf751aee4f3759abd55217e0d0 Mon Sep 17 00:00:00 2001 From: Eiichi Tsukata Date: Thu, 28 Jul 2022 04:39:07 +0000 Subject: [PATCH 1022/1056] docs/kernel-parameters: Update descriptions for "mitigations=" param with retbleed commit ea304a8b89fd0d6cf94ee30cb139dc23d9f1a62f upstream. Updates descriptions for "mitigations=off" and "mitigations=auto,nosmt" with the respective retbleed= settings. Signed-off-by: Eiichi Tsukata Signed-off-by: Borislav Petkov Cc: corbet@lwn.net Link: https://lore.kernel.org/r/20220728043907.165688-1-eiichi.tsukata@nutanix.com Signed-off-by: Greg Kroah-Hartman --- Documentation/admin-guide/kernel-parameters.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 2c556a127979e9..b47905c4a92fd9 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -3020,6 +3020,7 @@ no_entry_flush [PPC] no_uaccess_flush [PPC] mmio_stale_data=off [X86] + retbleed=off [X86] Exceptions: This does not have any effect on @@ -3042,6 +3043,7 @@ mds=full,nosmt [X86] tsx_async_abort=full,nosmt [X86] mmio_stale_data=full,nosmt [X86] + retbleed=auto,nosmt [X86] mminit_loglevel= [KNL] When CONFIG_DEBUG_MEMORY_INIT is set, this From d10e819d13f798c924cd75906573144042b4fdf0 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Wed, 22 Jun 2022 16:04:19 -0400 Subject: [PATCH 1023/1056] locking/rwsem: Allow slowpath writer to ignore handoff bit if not set by first waiter commit 6eebd5fb20838f5971ba17df9f55cc4f84a31053 upstream. With commit d257cc8cb8d5 ("locking/rwsem: Make handoff bit handling more consistent"), the writer that sets the handoff bit can be interrupted out without clearing the bit if the wait queue isn't empty. This disables reader and writer optimistic lock spinning and stealing. Now if a non-first writer in the queue is somehow woken up or a new waiter enters the slowpath, it can't acquire the lock. This is not the case before commit d257cc8cb8d5 as the writer that set the handoff bit will clear it when exiting out via the out_nolock path. This is less efficient as the busy rwsem stays in an unlock state for a longer time. In some cases, this new behavior may cause lockups as shown in [1] and [2]. This patch allows a non-first writer to ignore the handoff bit if it is not originally set or initiated by the first waiter. This patch is shown to be effective in fixing the lockup problem reported in [1]. [1] https://lore.kernel.org/lkml/20220617134325.GC30825@techsingularity.net/ [2] https://lore.kernel.org/lkml/3f02975c-1a9d-be20-32cf-f1d8e3dfafcc@oracle.com/ Fixes: d257cc8cb8d5 ("locking/rwsem: Make handoff bit handling more consistent") Signed-off-by: Waiman Long Signed-off-by: Peter Zijlstra (Intel) Acked-by: John Donnelly Tested-by: Mel Gorman Link: https://lore.kernel.org/r/20220622200419.778799-1-longman@redhat.com Signed-off-by: Greg Kroah-Hartman --- kernel/locking/rwsem.c | 30 ++++++++++++++++++++---------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c index e63f740c2cc840..4cc73e6f8974ba 100644 --- a/kernel/locking/rwsem.c +++ b/kernel/locking/rwsem.c @@ -335,8 +335,6 @@ struct rwsem_waiter { struct task_struct *task; enum rwsem_waiter_type type; unsigned long timeout; - - /* Writer only, not initialized in reader */ bool handoff_set; }; #define rwsem_first_waiter(sem) \ @@ -456,10 +454,12 @@ static void rwsem_mark_wake(struct rw_semaphore *sem, * to give up the lock), request a HANDOFF to * force the issue. */ - if (!(oldcount & RWSEM_FLAG_HANDOFF) && - time_after(jiffies, waiter->timeout)) { - adjustment -= RWSEM_FLAG_HANDOFF; - lockevent_inc(rwsem_rlock_handoff); + if (time_after(jiffies, waiter->timeout)) { + if (!(oldcount & RWSEM_FLAG_HANDOFF)) { + adjustment -= RWSEM_FLAG_HANDOFF; + lockevent_inc(rwsem_rlock_handoff); + } + waiter->handoff_set = true; } atomic_long_add(-adjustment, &sem->count); @@ -569,7 +569,7 @@ static void rwsem_mark_wake(struct rw_semaphore *sem, static inline bool rwsem_try_write_lock(struct rw_semaphore *sem, struct rwsem_waiter *waiter) { - bool first = rwsem_first_waiter(sem) == waiter; + struct rwsem_waiter *first = rwsem_first_waiter(sem); long count, new; lockdep_assert_held(&sem->wait_lock); @@ -579,11 +579,20 @@ static inline bool rwsem_try_write_lock(struct rw_semaphore *sem, bool has_handoff = !!(count & RWSEM_FLAG_HANDOFF); if (has_handoff) { - if (!first) + /* + * Honor handoff bit and yield only when the first + * waiter is the one that set it. Otherwisee, we + * still try to acquire the rwsem. + */ + if (first->handoff_set && (waiter != first)) return false; - /* First waiter inherits a previously set handoff bit */ - waiter->handoff_set = true; + /* + * First waiter can inherit a previously set handoff + * bit and spin on rwsem if lock acquisition fails. + */ + if (waiter == first) + waiter->handoff_set = true; } new = count; @@ -978,6 +987,7 @@ rwsem_down_read_slowpath(struct rw_semaphore *sem, long count, unsigned int stat waiter.task = current; waiter.type = RWSEM_WAITING_FOR_READ; waiter.timeout = jiffies + RWSEM_WAIT_TIMEOUT; + waiter.handoff_set = false; raw_spin_lock_irq(&sem->wait_lock); if (list_empty(&sem->wait_list)) { From f0e42e43795db4c1054295c0d35e79203db83449 Mon Sep 17 00:00:00 2001 From: Thadeu Lima de Souza Cascardo Date: Thu, 28 Jul 2022 09:26:02 -0300 Subject: [PATCH 1024/1056] x86/bugs: Do not enable IBPB at firmware entry when IBPB is not available commit 571c30b1a88465a1c85a6f7762609939b9085a15 upstream. Some cloud hypervisors do not provide IBPB on very recent CPU processors, including AMD processors affected by Retbleed. Using IBPB before firmware calls on such systems would cause a GPF at boot like the one below. Do not enable such calls when IBPB support is not present. EFI Variables Facility v0.08 2004-May-17 general protection fault, maybe for address 0x1: 0000 [#1] PREEMPT SMP NOPTI CPU: 0 PID: 24 Comm: kworker/u2:1 Not tainted 5.19.0-rc8+ #7 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 0.0.0 02/06/2015 Workqueue: efi_rts_wq efi_call_rts RIP: 0010:efi_call_rts Code: e8 37 33 58 ff 41 bf 48 00 00 00 49 89 c0 44 89 f9 48 83 c8 01 4c 89 c2 48 c1 ea 20 66 90 b9 49 00 00 00 b8 01 00 00 00 31 d2 <0f> 30 e8 7b 9f 5d ff e8 f6 f8 ff ff 4c 89 f1 4c 89 ea 4c 89 e6 48 RSP: 0018:ffffb373800d7e38 EFLAGS: 00010246 RAX: 0000000000000001 RBX: 0000000000000006 RCX: 0000000000000049 RDX: 0000000000000000 RSI: ffff94fbc19d8fe0 RDI: ffff94fbc1b2b300 RBP: ffffb373800d7e70 R08: 0000000000000000 R09: 0000000000000000 R10: 000000000000000b R11: 000000000000000b R12: ffffb3738001fd78 R13: ffff94fbc2fcfc00 R14: ffffb3738001fd80 R15: 0000000000000048 FS: 0000000000000000(0000) GS:ffff94fc3da00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: ffff94fc30201000 CR3: 000000006f610000 CR4: 00000000000406f0 Call Trace: ? __wake_up process_one_work worker_thread ? rescuer_thread kthread ? kthread_complete_and_exit ret_from_fork Modules linked in: Fixes: 28a99e95f55c ("x86/amd: Use IBPB for firmware calls") Reported-by: Dimitri John Ledkov Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Borislav Petkov Cc: Link: https://lore.kernel.org/r/20220728122602.2500509-1-cascardo@canonical.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 18a7ea1cffdaca..a37814c8547e41 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -1513,6 +1513,7 @@ static void __init spectre_v2_select_mitigation(void) * enable IBRS around firmware calls. */ if (boot_cpu_has_bug(X86_BUG_RETBLEED) && + boot_cpu_has(X86_FEATURE_IBPB) && (boot_cpu_data.x86_vendor == X86_VENDOR_AMD || boot_cpu_data.x86_vendor == X86_VENDOR_HYGON)) { From d676d6149a2f4b4d66b8ea0a1dfef30a54cf5750 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 3 Aug 2022 12:03:56 +0200 Subject: [PATCH 1025/1056] Linux 5.15.59 Link: https://lore.kernel.org/r/20220801114134.468284027@linuxfoundation.org Tested-by: Jon Hunter Tested-by: Florian Fainelli Tested-by: Linux Kernel Functional Testing Tested-by: Shuah Khan Tested-by: Bagas Sanjaya Tested-by: Guenter Roeck Tested-by: Ron Economos Tested-by: Sudip Mukherjee Signed-off-by: Greg Kroah-Hartman --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index d7ba0de250cbde..22bca3948306b4 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 5 PATCHLEVEL = 15 -SUBLEVEL = 58 +SUBLEVEL = 59 EXTRAVERSION = NAME = Trick or Treat From df9692b8a31902d8f0ae9e3f81cb4465431e22cc Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Sat, 23 Jul 2022 17:22:47 +0200 Subject: [PATCH 1026/1056] x86/speculation: Make all RETbleed mitigations 64-bit only commit b648ab487f31bc4c38941bc770ea97fe394304bb upstream. The mitigations for RETBleed are currently ineffective on x86_32 since entry_32.S does not use the required macros. However, for an x86_32 target, the kconfig symbols for them are still enabled by default and /sys/devices/system/cpu/vulnerabilities/retbleed will wrongly report that mitigations are in place. Make all of these symbols depend on X86_64, and only enable RETHUNK by default on X86_64. Fixes: f43b9876e857 ("x86/retbleed: Add fine grained Kconfig knobs") Signed-off-by: Ben Hutchings Signed-off-by: Borislav Petkov Cc: Link: https://lore.kernel.org/r/YtwSR3NNsWp1ohfV@decadent.org.uk [bwh: Backported to 5.10/5.15/5.18: adjust context] Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- arch/x86/Kconfig | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index a170cfdae2a7aa..fe6981a3879571 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -2427,7 +2427,7 @@ config RETPOLINE config RETHUNK bool "Enable return-thunks" depends on RETPOLINE && CC_HAS_RETURN_THUNK - default y + default y if X86_64 help Compile the kernel with the return-thunks compiler option to guard against kernel-to-user data leaks by avoiding return speculation. @@ -2436,21 +2436,21 @@ config RETHUNK config CPU_UNRET_ENTRY bool "Enable UNRET on kernel entry" - depends on CPU_SUP_AMD && RETHUNK + depends on CPU_SUP_AMD && RETHUNK && X86_64 default y help Compile the kernel with support for the retbleed=unret mitigation. config CPU_IBPB_ENTRY bool "Enable IBPB on kernel entry" - depends on CPU_SUP_AMD + depends on CPU_SUP_AMD && X86_64 default y help Compile the kernel with support for the retbleed=ibpb mitigation. config CPU_IBRS_ENTRY bool "Enable IBRS on kernel entry" - depends on CPU_SUP_INTEL + depends on CPU_SUP_INTEL && X86_64 default y help Compile the kernel with support for the spectre_v2=ibrs mitigation. From 119debdb9f25b1b24f6c52aa9b8a4aa4b51e2465 Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Mon, 1 Aug 2022 17:48:50 +0300 Subject: [PATCH 1027/1056] selftests/bpf: Extend verifier and bpf_sock tests for dst_port loads commit 8f50f16ff39dd4e2d43d1548ca66925652f8aff7 upstream. Add coverage to the verifier tests and tests for reading bpf_sock fields to ensure that 32-bit, 16-bit, and 8-bit loads from dst_port field are allowed only at intended offsets and produce expected values. While 16-bit and 8-bit access to dst_port field is straight-forward, 32-bit wide loads need be allowed and produce a zero-padded 16-bit value for backward compatibility. Signed-off-by: Jakub Sitnicki Link: https://lore.kernel.org/r/20220130115518.213259-3-jakub@cloudflare.com Signed-off-by: Alexei Starovoitov Signed-off-by: Ovidiu Panait Signed-off-by: Greg Kroah-Hartman --- tools/include/uapi/linux/bpf.h | 3 +- .../selftests/bpf/prog_tests/sock_fields.c | 58 +++++++++---- .../selftests/bpf/progs/test_sock_fields.c | 41 ++++++++++ tools/testing/selftests/bpf/verifier/sock.c | 81 ++++++++++++++++++- 4 files changed, 162 insertions(+), 21 deletions(-) diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index e2c8f946c5416b..8330e3ca8fbfb9 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -5347,7 +5347,8 @@ struct bpf_sock { __u32 src_ip4; __u32 src_ip6[4]; __u32 src_port; /* host byte order */ - __u32 dst_port; /* network byte order */ + __be16 dst_port; /* network byte order */ + __u16 :16; /* zero padding */ __u32 dst_ip4; __u32 dst_ip6[4]; __u32 state; diff --git a/tools/testing/selftests/bpf/prog_tests/sock_fields.c b/tools/testing/selftests/bpf/prog_tests/sock_fields.c index 577d619fb07ed9..197ec1d1b70264 100644 --- a/tools/testing/selftests/bpf/prog_tests/sock_fields.c +++ b/tools/testing/selftests/bpf/prog_tests/sock_fields.c @@ -1,9 +1,11 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2019 Facebook */ +#define _GNU_SOURCE #include #include #include +#include #include #include #include @@ -21,6 +23,7 @@ enum bpf_linum_array_idx { EGRESS_LINUM_IDX, INGRESS_LINUM_IDX, + READ_SK_DST_PORT_LINUM_IDX, __NR_BPF_LINUM_ARRAY_IDX, }; @@ -43,8 +46,16 @@ static __u64 child_cg_id; static int linum_map_fd; static __u32 duration; -static __u32 egress_linum_idx = EGRESS_LINUM_IDX; -static __u32 ingress_linum_idx = INGRESS_LINUM_IDX; +static bool create_netns(void) +{ + if (!ASSERT_OK(unshare(CLONE_NEWNET), "create netns")) + return false; + + if (!ASSERT_OK(system("ip link set dev lo up"), "bring up lo")) + return false; + + return true; +} static void print_sk(const struct bpf_sock *sk, const char *prefix) { @@ -92,19 +103,24 @@ static void check_result(void) { struct bpf_tcp_sock srv_tp, cli_tp, listen_tp; struct bpf_sock srv_sk, cli_sk, listen_sk; - __u32 ingress_linum, egress_linum; + __u32 idx, ingress_linum, egress_linum, linum; int err; - err = bpf_map_lookup_elem(linum_map_fd, &egress_linum_idx, - &egress_linum); + idx = EGRESS_LINUM_IDX; + err = bpf_map_lookup_elem(linum_map_fd, &idx, &egress_linum); CHECK(err < 0, "bpf_map_lookup_elem(linum_map_fd)", "err:%d errno:%d\n", err, errno); - err = bpf_map_lookup_elem(linum_map_fd, &ingress_linum_idx, - &ingress_linum); + idx = INGRESS_LINUM_IDX; + err = bpf_map_lookup_elem(linum_map_fd, &idx, &ingress_linum); CHECK(err < 0, "bpf_map_lookup_elem(linum_map_fd)", "err:%d errno:%d\n", err, errno); + idx = READ_SK_DST_PORT_LINUM_IDX; + err = bpf_map_lookup_elem(linum_map_fd, &idx, &linum); + ASSERT_OK(err, "bpf_map_lookup_elem(linum_map_fd, READ_SK_DST_PORT_IDX)"); + ASSERT_EQ(linum, 0, "failure in read_sk_dst_port on line"); + memcpy(&srv_sk, &skel->bss->srv_sk, sizeof(srv_sk)); memcpy(&srv_tp, &skel->bss->srv_tp, sizeof(srv_tp)); memcpy(&cli_sk, &skel->bss->cli_sk, sizeof(cli_sk)); @@ -263,7 +279,7 @@ static void test(void) char buf[DATA_LEN]; /* Prepare listen_fd */ - listen_fd = start_server(AF_INET6, SOCK_STREAM, "::1", 0, 0); + listen_fd = start_server(AF_INET6, SOCK_STREAM, "::1", 0xcafe, 0); /* start_server() has logged the error details */ if (CHECK_FAIL(listen_fd == -1)) goto done; @@ -331,8 +347,12 @@ static void test(void) void test_sock_fields(void) { - struct bpf_link *egress_link = NULL, *ingress_link = NULL; int parent_cg_fd = -1, child_cg_fd = -1; + struct bpf_link *link; + + /* Use a dedicated netns to have a fixed listen port */ + if (!create_netns()) + return; /* Create a cgroup, get fd, and join it */ parent_cg_fd = test__join_cgroup(PARENT_CGROUP); @@ -353,15 +373,20 @@ void test_sock_fields(void) if (CHECK(!skel, "test_sock_fields__open_and_load", "failed\n")) goto done; - egress_link = bpf_program__attach_cgroup(skel->progs.egress_read_sock_fields, - child_cg_fd); - if (!ASSERT_OK_PTR(egress_link, "attach_cgroup(egress)")) + link = bpf_program__attach_cgroup(skel->progs.egress_read_sock_fields, child_cg_fd); + if (!ASSERT_OK_PTR(link, "attach_cgroup(egress_read_sock_fields)")) + goto done; + skel->links.egress_read_sock_fields = link; + + link = bpf_program__attach_cgroup(skel->progs.ingress_read_sock_fields, child_cg_fd); + if (!ASSERT_OK_PTR(link, "attach_cgroup(ingress_read_sock_fields)")) goto done; + skel->links.ingress_read_sock_fields = link; - ingress_link = bpf_program__attach_cgroup(skel->progs.ingress_read_sock_fields, - child_cg_fd); - if (!ASSERT_OK_PTR(ingress_link, "attach_cgroup(ingress)")) + link = bpf_program__attach_cgroup(skel->progs.read_sk_dst_port, child_cg_fd); + if (!ASSERT_OK_PTR(link, "attach_cgroup(read_sk_dst_port")) goto done; + skel->links.read_sk_dst_port = link; linum_map_fd = bpf_map__fd(skel->maps.linum_map); sk_pkt_out_cnt_fd = bpf_map__fd(skel->maps.sk_pkt_out_cnt); @@ -370,8 +395,7 @@ void test_sock_fields(void) test(); done: - bpf_link__destroy(egress_link); - bpf_link__destroy(ingress_link); + test_sock_fields__detach(skel); test_sock_fields__destroy(skel); if (child_cg_fd >= 0) close(child_cg_fd); diff --git a/tools/testing/selftests/bpf/progs/test_sock_fields.c b/tools/testing/selftests/bpf/progs/test_sock_fields.c index 7967348b11af69..3e2e3ee51cc9ed 100644 --- a/tools/testing/selftests/bpf/progs/test_sock_fields.c +++ b/tools/testing/selftests/bpf/progs/test_sock_fields.c @@ -12,6 +12,7 @@ enum bpf_linum_array_idx { EGRESS_LINUM_IDX, INGRESS_LINUM_IDX, + READ_SK_DST_PORT_LINUM_IDX, __NR_BPF_LINUM_ARRAY_IDX, }; @@ -250,4 +251,44 @@ int ingress_read_sock_fields(struct __sk_buff *skb) return CG_OK; } +static __noinline bool sk_dst_port__load_word(struct bpf_sock *sk) +{ + __u32 *word = (__u32 *)&sk->dst_port; + return word[0] == bpf_htonl(0xcafe0000); +} + +static __noinline bool sk_dst_port__load_half(struct bpf_sock *sk) +{ + __u16 *half = (__u16 *)&sk->dst_port; + return half[0] == bpf_htons(0xcafe); +} + +static __noinline bool sk_dst_port__load_byte(struct bpf_sock *sk) +{ + __u8 *byte = (__u8 *)&sk->dst_port; + return byte[0] == 0xca && byte[1] == 0xfe; +} + +SEC("cgroup_skb/egress") +int read_sk_dst_port(struct __sk_buff *skb) +{ + __u32 linum, linum_idx; + struct bpf_sock *sk; + + linum_idx = READ_SK_DST_PORT_LINUM_IDX; + + sk = skb->sk; + if (!sk) + RET_LOG(); + + if (!sk_dst_port__load_word(sk)) + RET_LOG(); + if (!sk_dst_port__load_half(sk)) + RET_LOG(); + if (!sk_dst_port__load_byte(sk)) + RET_LOG(); + + return CG_OK; +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/verifier/sock.c b/tools/testing/selftests/bpf/verifier/sock.c index ce13ece08d51cb..8c224eac93df71 100644 --- a/tools/testing/selftests/bpf/verifier/sock.c +++ b/tools/testing/selftests/bpf/verifier/sock.c @@ -121,7 +121,25 @@ .result = ACCEPT, }, { - "sk_fullsock(skb->sk): sk->dst_port [narrow load]", + "sk_fullsock(skb->sk): sk->dst_port [word load] (backward compatibility)", + .insns = { + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)), + BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_EMIT_CALL(BPF_FUNC_sk_fullsock), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_sock, dst_port)), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + .result = ACCEPT, +}, +{ + "sk_fullsock(skb->sk): sk->dst_port [half load]", .insns = { BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)), BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2), @@ -139,7 +157,64 @@ .result = ACCEPT, }, { - "sk_fullsock(skb->sk): sk->dst_port [load 2nd byte]", + "sk_fullsock(skb->sk): sk->dst_port [half load] (invalid)", + .insns = { + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)), + BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_EMIT_CALL(BPF_FUNC_sk_fullsock), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_sock, dst_port) + 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + .result = REJECT, + .errstr = "invalid sock access", +}, +{ + "sk_fullsock(skb->sk): sk->dst_port [byte load]", + .insns = { + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)), + BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_EMIT_CALL(BPF_FUNC_sk_fullsock), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_B, BPF_REG_2, BPF_REG_0, offsetof(struct bpf_sock, dst_port)), + BPF_LDX_MEM(BPF_B, BPF_REG_2, BPF_REG_0, offsetof(struct bpf_sock, dst_port) + 1), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + .result = ACCEPT, +}, +{ + "sk_fullsock(skb->sk): sk->dst_port [byte load] (invalid)", + .insns = { + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)), + BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_EMIT_CALL(BPF_FUNC_sk_fullsock), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_sock, dst_port) + 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + .result = REJECT, + .errstr = "invalid sock access", +}, +{ + "sk_fullsock(skb->sk): past sk->dst_port [half load] (invalid)", .insns = { BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)), BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2), @@ -149,7 +224,7 @@ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), - BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_sock, dst_port) + 1), + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_0, offsetofend(struct bpf_sock, dst_port)), BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, From e889a4c440eb64296857b50a1fc2c9e57cc9c0d8 Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Mon, 1 Aug 2022 17:48:51 +0300 Subject: [PATCH 1028/1056] selftests/bpf: Check dst_port only on the client socket commit 2d2202ba858c112b03f84d546e260c61425831a1 upstream. cgroup_skb/egress programs which sock_fields test installs process packets flying in both directions, from the client to the server, and in reverse direction. Recently added dst_port check relies on the fact that destination port (remote peer port) of the socket which sends the packet is known ahead of time. This holds true only for the client socket, which connects to the known server port. Filter out any traffic that is not egressing from the client socket in the BPF program that tests reading the dst_port. Fixes: 8f50f16ff39d ("selftests/bpf: Extend verifier and bpf_sock tests for dst_port loads") Signed-off-by: Jakub Sitnicki Signed-off-by: Daniel Borkmann Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20220317113920.1068535-3-jakub@cloudflare.com Signed-off-by: Ovidiu Panait Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/bpf/progs/test_sock_fields.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/testing/selftests/bpf/progs/test_sock_fields.c b/tools/testing/selftests/bpf/progs/test_sock_fields.c index 3e2e3ee51cc9ed..43b31aa1fcf727 100644 --- a/tools/testing/selftests/bpf/progs/test_sock_fields.c +++ b/tools/testing/selftests/bpf/progs/test_sock_fields.c @@ -281,6 +281,10 @@ int read_sk_dst_port(struct __sk_buff *skb) if (!sk) RET_LOG(); + /* Ignore everything but the SYN from the client socket */ + if (sk->state != BPF_TCP_SYN_SENT) + return CG_OK; + if (!sk_dst_port__load_word(sk)) RET_LOG(); if (!sk_dst_port__load_half(sk)) From 7ad47f414b4095fa104daaf08c8c942308f5a5c4 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 23 Jun 2022 09:48:26 +0200 Subject: [PATCH 1029/1056] block: fix default IO priority handling again commit e589f46445960c274cc813a1cc8e2fc73b2a1849 upstream. Commit e70344c05995 ("block: fix default IO priority handling") introduced an inconsistency in get_current_ioprio() that tasks without IO context return IOPRIO_DEFAULT priority while tasks with freshly allocated IO context will return 0 (IOPRIO_CLASS_NONE/0) IO priority. Tasks without IO context used to be rare before 5a9d041ba2f6 ("block: move io_context creation into where it's needed") but after this commit they became common because now only BFQ IO scheduler setups task's IO context. Similar inconsistency is there for get_task_ioprio() so this inconsistency is now exposed to userspace and userspace will see different IO priority for tasks operating on devices with BFQ compared to devices without BFQ. Furthemore the changes done by commit e70344c05995 change the behavior when no IO priority is set for BFQ IO scheduler which is also documented in ioprio_set(2) manpage: "If no I/O scheduler has been set for a thread, then by default the I/O priority will follow the CPU nice value (setpriority(2)). In Linux kernels before version 2.6.24, once an I/O priority had been set using ioprio_set(), there was no way to reset the I/O scheduling behavior to the default. Since Linux 2.6.24, specifying ioprio as 0 can be used to reset to the default I/O scheduling behavior." So make sure we default to IOPRIO_CLASS_NONE as used to be the case before commit e70344c05995. Also cleanup alloc_io_context() to explicitely set this IO priority for the allocated IO context to avoid future surprises. Note that we tweak ioprio_best() to maintain ioprio_get(2) behavior and make this commit easily backportable. CC: stable@vger.kernel.org Fixes: e70344c05995 ("block: fix default IO priority handling") Reviewed-by: Damien Le Moal Tested-by: Damien Le Moal Signed-off-by: Jan Kara Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20220623074840.5960-1-jack@suse.cz Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- block/blk-ioc.c | 1 + block/ioprio.c | 4 ++-- include/linux/ioprio.h | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/block/blk-ioc.c b/block/blk-ioc.c index 57299f860d41eb..90c05971f71e07 100644 --- a/block/blk-ioc.c +++ b/block/blk-ioc.c @@ -265,6 +265,7 @@ int create_task_io_context(struct task_struct *task, gfp_t gfp_flags, int node) INIT_RADIX_TREE(&ioc->icq_tree, GFP_ATOMIC); INIT_HLIST_HEAD(&ioc->icq_list); INIT_WORK(&ioc->release_work, ioc_release_fn); + ioc->ioprio = IOPRIO_DEFAULT; /* * Try to install. ioc shouldn't be installed if someone else diff --git a/block/ioprio.c b/block/ioprio.c index 6f01d35a5145a6..6c830154856fc6 100644 --- a/block/ioprio.c +++ b/block/ioprio.c @@ -189,9 +189,9 @@ static int get_task_ioprio(struct task_struct *p) int ioprio_best(unsigned short aprio, unsigned short bprio) { if (!ioprio_valid(aprio)) - aprio = IOPRIO_DEFAULT; + aprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, IOPRIO_BE_NORM); if (!ioprio_valid(bprio)) - bprio = IOPRIO_DEFAULT; + bprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, IOPRIO_BE_NORM); return min(aprio, bprio); } diff --git a/include/linux/ioprio.h b/include/linux/ioprio.h index 3f53bc27a19bfc..3d088a88f83202 100644 --- a/include/linux/ioprio.h +++ b/include/linux/ioprio.h @@ -11,7 +11,7 @@ /* * Default IO priority. */ -#define IOPRIO_DEFAULT IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, IOPRIO_BE_NORM) +#define IOPRIO_DEFAULT IOPRIO_PRIO_VALUE(IOPRIO_CLASS_NONE, 0) /* * Check that a priority value has a valid class. From 9894717519cc144827bf8a46b543363533e8c0fe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Graber?= Date: Thu, 23 Jun 2022 16:45:52 -0500 Subject: [PATCH 1030/1056] tools/vm/slabinfo: Handle files in debugfs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 0c7e0d699ef1430d7f4cf12b4b1d097af58b5515 upstream. Commit 64dd68497be76 relocated and renamed the alloc_calls and free_calls files from /sys/kernel/slab/NAME/*_calls over to /sys/kernel/debug/slab/NAME/*_calls but didn't update the slabinfo tool with the new location. This change will now have slabinfo look at the new location (and filenames) with a fallback to the prior files. Fixes: 64dd68497be76 ("mm: slub: move sysfs slab alloc/free interfaces to debugfs") Cc: stable@vger.kernel.org Signed-off-by: Stéphane Graber Tested-by: Stéphane Graber Signed-off-by: Vlastimil Babka Signed-off-by: Greg Kroah-Hartman --- tools/vm/slabinfo.c | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/tools/vm/slabinfo.c b/tools/vm/slabinfo.c index 9b68658b6bb859..5b98f3ee58a58b 100644 --- a/tools/vm/slabinfo.c +++ b/tools/vm/slabinfo.c @@ -233,6 +233,24 @@ static unsigned long read_slab_obj(struct slabinfo *s, const char *name) return l; } +static unsigned long read_debug_slab_obj(struct slabinfo *s, const char *name) +{ + char x[128]; + FILE *f; + size_t l; + + snprintf(x, 128, "/sys/kernel/debug/slab/%s/%s", s->name, name); + f = fopen(x, "r"); + if (!f) { + buffer[0] = 0; + l = 0; + } else { + l = fread(buffer, 1, sizeof(buffer), f); + buffer[l] = 0; + fclose(f); + } + return l; +} /* * Put a size string together @@ -409,14 +427,18 @@ static void show_tracking(struct slabinfo *s) { printf("\n%s: Kernel object allocation\n", s->name); printf("-----------------------------------------------------------------------\n"); - if (read_slab_obj(s, "alloc_calls")) + if (read_debug_slab_obj(s, "alloc_traces")) + printf("%s", buffer); + else if (read_slab_obj(s, "alloc_calls")) printf("%s", buffer); else printf("No Data\n"); printf("\n%s: Kernel object freeing\n", s->name); printf("------------------------------------------------------------------------\n"); - if (read_slab_obj(s, "free_calls")) + if (read_debug_slab_obj(s, "free_traces")) + printf("%s", buffer); + else if (read_slab_obj(s, "free_calls")) printf("%s", buffer); else printf("No Data\n"); From 3a5fab5c45056a8d8c33880b113ae40cf23500e8 Mon Sep 17 00:00:00 2001 From: Werner Sembach Date: Thu, 7 Jul 2022 20:09:52 +0200 Subject: [PATCH 1031/1056] ACPI: video: Force backlight native for some TongFang devices commit c752089f7cf5b5800c6ace4cdd1a8351ee78a598 upstream. The TongFang PF5PU1G, PF4NU1F, PF5NU1G, and PF5LUXG/TUXEDO BA15 Gen10, Pulse 14/15 Gen1, and Pulse 15 Gen2 have the same problem as the Clevo NL5xRU and NL5xNU/TUXEDO Aura 15 Gen1 and Gen2: They have a working native and video interface. However the default detection mechanism first registers the video interface before unregistering it again and switching to the native interface during boot. This results in a dangling SBIOS request for backlight change for some reason, causing the backlight to switch to ~2% once per boot on the first power cord connect or disconnect event. Setting the native interface explicitly circumvents this buggy behaviour by avoiding the unregistering process. Signed-off-by: Werner Sembach Cc: All applicable Reviewed-by: Hans de Goede Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/video_detect.c | 51 ++++++++++++++++++++++++++++++++++++- 1 file changed, 50 insertions(+), 1 deletion(-) diff --git a/drivers/acpi/video_detect.c b/drivers/acpi/video_detect.c index 7b9793cb55c504..3021254597f95f 100644 --- a/drivers/acpi/video_detect.c +++ b/drivers/acpi/video_detect.c @@ -484,7 +484,56 @@ static const struct dmi_system_id video_detect_dmi_table[] = { DMI_MATCH(DMI_BOARD_NAME, "NL5xNU"), }, }, - + /* + * The TongFang PF5PU1G, PF4NU1F, PF5NU1G, and PF5LUXG/TUXEDO BA15 Gen10, + * Pulse 14/15 Gen1, and Pulse 15 Gen2 have the same problem as the Clevo + * NL5xRU and NL5xNU/TUXEDO Aura 15 Gen1 and Gen2. See the description + * above. + */ + { + .callback = video_detect_force_native, + .ident = "TongFang PF5PU1G", + .matches = { + DMI_MATCH(DMI_BOARD_NAME, "PF5PU1G"), + }, + }, + { + .callback = video_detect_force_native, + .ident = "TongFang PF4NU1F", + .matches = { + DMI_MATCH(DMI_BOARD_NAME, "PF4NU1F"), + }, + }, + { + .callback = video_detect_force_native, + .ident = "TongFang PF4NU1F", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "TUXEDO"), + DMI_MATCH(DMI_BOARD_NAME, "PULSE1401"), + }, + }, + { + .callback = video_detect_force_native, + .ident = "TongFang PF5NU1G", + .matches = { + DMI_MATCH(DMI_BOARD_NAME, "PF5NU1G"), + }, + }, + { + .callback = video_detect_force_native, + .ident = "TongFang PF5NU1G", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "TUXEDO"), + DMI_MATCH(DMI_BOARD_NAME, "PULSE1501"), + }, + }, + { + .callback = video_detect_force_native, + .ident = "TongFang PF5LUXG", + .matches = { + DMI_MATCH(DMI_BOARD_NAME, "PF5LUXG"), + }, + }, /* * Desktops which falsely report a backlight and which our heuristics * for this do not catch. From e7170bcda6137ed682c59da713c0af44d8f706e5 Mon Sep 17 00:00:00 2001 From: Werner Sembach Date: Thu, 7 Jul 2022 20:09:53 +0200 Subject: [PATCH 1032/1056] ACPI: video: Shortening quirk list by identifying Clevo by board_name only commit f0341e67b3782603737f7788e71bd3530012a4f4 upstream. Taking a recent change in the i8042 quirklist to this one: Clevo board_names are somewhat unique, and if not: The generic Board_-/Sys_Vendor string "Notebook" doesn't help much anyway. So identifying the devices just by the board_name helps keeping the list significantly shorter and might even hit more devices requiring the fix. Signed-off-by: Werner Sembach Fixes: c844d22fe0c0 ("ACPI: video: Force backlight native for Clevo NL5xRU and NL5xNU") Cc: All applicable Reviewed-by: Hans de Goede Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/video_detect.c | 34 ---------------------------------- 1 file changed, 34 deletions(-) diff --git a/drivers/acpi/video_detect.c b/drivers/acpi/video_detect.c index 3021254597f95f..e39d59ad64964e 100644 --- a/drivers/acpi/video_detect.c +++ b/drivers/acpi/video_detect.c @@ -424,23 +424,6 @@ static const struct dmi_system_id video_detect_dmi_table[] = { .callback = video_detect_force_native, .ident = "Clevo NL5xRU", .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "TUXEDO"), - DMI_MATCH(DMI_BOARD_NAME, "NL5xRU"), - }, - }, - { - .callback = video_detect_force_native, - .ident = "Clevo NL5xRU", - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "SchenkerTechnologiesGmbH"), - DMI_MATCH(DMI_BOARD_NAME, "NL5xRU"), - }, - }, - { - .callback = video_detect_force_native, - .ident = "Clevo NL5xRU", - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "Notebook"), DMI_MATCH(DMI_BOARD_NAME, "NL5xRU"), }, }, @@ -464,23 +447,6 @@ static const struct dmi_system_id video_detect_dmi_table[] = { .callback = video_detect_force_native, .ident = "Clevo NL5xNU", .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "TUXEDO"), - DMI_MATCH(DMI_BOARD_NAME, "NL5xNU"), - }, - }, - { - .callback = video_detect_force_native, - .ident = "Clevo NL5xNU", - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "SchenkerTechnologiesGmbH"), - DMI_MATCH(DMI_BOARD_NAME, "NL5xNU"), - }, - }, - { - .callback = video_detect_force_native, - .ident = "Clevo NL5xNU", - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "Notebook"), DMI_MATCH(DMI_BOARD_NAME, "NL5xNU"), }, }, From 397c2116cbe26a9f09e825ad394e802804bc182a Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Wed, 22 Jun 2022 10:09:06 -0700 Subject: [PATCH 1033/1056] ACPI: APEI: Better fix to avoid spamming the console with old error logs commit c3481b6b75b4797657838f44028fd28226ab48e0 upstream. The fix in commit 3f8dec116210 ("ACPI/APEI: Limit printable size of BERT table data") does not work as intended on systems where the BIOS has a fixed size block of memory for the BERT table, relying on s/w to quit when it finds a record with estatus->block_status == 0. On these systems all errors are suppressed because the check: if (region_len < ACPI_BERT_PRINT_MAX_LEN) always fails. New scheme skips individual CPER records that are too large, and also limits the total number of records that will be printed to 5. Fixes: 3f8dec116210 ("ACPI/APEI: Limit printable size of BERT table data") Cc: All applicable Signed-off-by: Tony Luck Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/apei/bert.c | 31 +++++++++++++++++++++++-------- 1 file changed, 23 insertions(+), 8 deletions(-) diff --git a/drivers/acpi/apei/bert.c b/drivers/acpi/apei/bert.c index 598fd19b65fa48..45973aa6e06d48 100644 --- a/drivers/acpi/apei/bert.c +++ b/drivers/acpi/apei/bert.c @@ -29,16 +29,26 @@ #undef pr_fmt #define pr_fmt(fmt) "BERT: " fmt + +#define ACPI_BERT_PRINT_MAX_RECORDS 5 #define ACPI_BERT_PRINT_MAX_LEN 1024 static int bert_disable; +/* + * Print "all" the error records in the BERT table, but avoid huge spam to + * the console if the BIOS included oversize records, or too many records. + * Skipping some records here does not lose anything because the full + * data is available to user tools in: + * /sys/firmware/acpi/tables/data/BERT + */ static void __init bert_print_all(struct acpi_bert_region *region, unsigned int region_len) { struct acpi_hest_generic_status *estatus = (struct acpi_hest_generic_status *)region; int remain = region_len; + int printed = 0, skipped = 0; u32 estatus_len; while (remain >= sizeof(struct acpi_bert_region)) { @@ -46,24 +56,26 @@ static void __init bert_print_all(struct acpi_bert_region *region, if (remain < estatus_len) { pr_err(FW_BUG "Truncated status block (length: %u).\n", estatus_len); - return; + break; } /* No more error records. */ if (!estatus->block_status) - return; + break; if (cper_estatus_check(estatus)) { pr_err(FW_BUG "Invalid error record.\n"); - return; + break; } - pr_info_once("Error records from previous boot:\n"); - if (region_len < ACPI_BERT_PRINT_MAX_LEN) + if (estatus_len < ACPI_BERT_PRINT_MAX_LEN && + printed < ACPI_BERT_PRINT_MAX_RECORDS) { + pr_info_once("Error records from previous boot:\n"); cper_estatus_print(KERN_INFO HW_ERR, estatus); - else - pr_info_once("Max print length exceeded, table data is available at:\n" - "/sys/firmware/acpi/tables/data/BERT"); + printed++; + } else { + skipped++; + } /* * Because the boot error source is "one-time polled" type, @@ -75,6 +87,9 @@ static void __init bert_print_all(struct acpi_bert_region *region, estatus = (void *)estatus + estatus_len; remain -= estatus_len; } + + if (skipped) + pr_info(HW_ERR "Skipped %d error records\n", skipped); } static int __init setup_bert_disable(char *str) From 3d4c28475ee352c440b83484b72b1320ff76364a Mon Sep 17 00:00:00 2001 From: GUO Zihua Date: Fri, 22 Jul 2022 14:31:57 +0800 Subject: [PATCH 1034/1056] crypto: arm64/poly1305 - fix a read out-of-bound commit 7ae19d422c7da84b5f13bc08b98bd737a08d3a53 upstream. A kasan error was reported during fuzzing: BUG: KASAN: slab-out-of-bounds in neon_poly1305_blocks.constprop.0+0x1b4/0x250 [poly1305_neon] Read of size 4 at addr ffff0010e293f010 by task syz-executor.5/1646715 CPU: 4 PID: 1646715 Comm: syz-executor.5 Kdump: loaded Not tainted 5.10.0.aarch64 #1 Hardware name: Huawei TaiShan 2280 /BC11SPCD, BIOS 1.59 01/31/2019 Call trace: dump_backtrace+0x0/0x394 show_stack+0x34/0x4c arch/arm64/kernel/stacktrace.c:196 __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x158/0x1e4 lib/dump_stack.c:118 print_address_description.constprop.0+0x68/0x204 mm/kasan/report.c:387 __kasan_report+0xe0/0x140 mm/kasan/report.c:547 kasan_report+0x44/0xe0 mm/kasan/report.c:564 check_memory_region_inline mm/kasan/generic.c:187 [inline] __asan_load4+0x94/0xd0 mm/kasan/generic.c:252 neon_poly1305_blocks.constprop.0+0x1b4/0x250 [poly1305_neon] neon_poly1305_do_update+0x6c/0x15c [poly1305_neon] neon_poly1305_update+0x9c/0x1c4 [poly1305_neon] crypto_shash_update crypto/shash.c:131 [inline] shash_finup_unaligned+0x84/0x15c crypto/shash.c:179 crypto_shash_finup+0x8c/0x140 crypto/shash.c:193 shash_digest_unaligned+0xb8/0xe4 crypto/shash.c:201 crypto_shash_digest+0xa4/0xfc crypto/shash.c:217 crypto_shash_tfm_digest+0xb4/0x150 crypto/shash.c:229 essiv_skcipher_setkey+0x164/0x200 [essiv] crypto_skcipher_setkey+0xb0/0x160 crypto/skcipher.c:612 skcipher_setkey+0x3c/0x50 crypto/algif_skcipher.c:305 alg_setkey+0x114/0x2a0 crypto/af_alg.c:220 alg_setsockopt+0x19c/0x210 crypto/af_alg.c:253 __sys_setsockopt+0x190/0x2e0 net/socket.c:2123 __do_sys_setsockopt net/socket.c:2134 [inline] __se_sys_setsockopt net/socket.c:2131 [inline] __arm64_sys_setsockopt+0x78/0x94 net/socket.c:2131 __invoke_syscall arch/arm64/kernel/syscall.c:36 [inline] invoke_syscall+0x64/0x100 arch/arm64/kernel/syscall.c:48 el0_svc_common.constprop.0+0x220/0x230 arch/arm64/kernel/syscall.c:155 do_el0_svc+0xb4/0xd4 arch/arm64/kernel/syscall.c:217 el0_svc+0x24/0x3c arch/arm64/kernel/entry-common.c:353 el0_sync_handler+0x160/0x164 arch/arm64/kernel/entry-common.c:369 el0_sync+0x160/0x180 arch/arm64/kernel/entry.S:683 This error can be reproduced by the following code compiled as ko on a system with kasan enabled: #include #include #include #include char test_data[] = "\x00\x01\x02\x03\x04\x05\x06\x07" "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" "\x10\x11\x12\x13\x14\x15\x16\x17" "\x18\x19\x1a\x1b\x1c\x1d\x1e"; int init(void) { struct crypto_shash *tfm = NULL; char *data = NULL, *out = NULL; tfm = crypto_alloc_shash("poly1305", 0, 0); data = kmalloc(POLY1305_KEY_SIZE - 1, GFP_KERNEL); out = kmalloc(POLY1305_DIGEST_SIZE, GFP_KERNEL); memcpy(data, test_data, POLY1305_KEY_SIZE - 1); crypto_shash_tfm_digest(tfm, data, POLY1305_KEY_SIZE - 1, out); kfree(data); kfree(out); return 0; } void deinit(void) { } module_init(init) module_exit(deinit) MODULE_LICENSE("GPL"); The root cause of the bug sits in neon_poly1305_blocks. The logic neon_poly1305_blocks() performed is that if it was called with both s[] and r[] uninitialized, it will first try to initialize them with the data from the first "block" that it believed to be 32 bytes in length. First 16 bytes are used as the key and the next 16 bytes for s[]. This would lead to the aforementioned read out-of-bound. However, after calling poly1305_init_arch(), only 16 bytes were deducted from the input and s[] is initialized yet again with the following 16 bytes. The second initialization of s[] is certainly redundent which indicates that the first initialization should be for r[] only. This patch fixes the issue by calling poly1305_init_arm64() instead of poly1305_init_arch(). This is also the implementation for the same algorithm on arm platform. Fixes: f569ca164751 ("crypto: arm64/poly1305 - incorporate OpenSSL/CRYPTOGAMS NEON implementation") Cc: stable@vger.kernel.org Signed-off-by: GUO Zihua Reviewed-by: Eric Biggers Acked-by: Will Deacon Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- arch/arm64/crypto/poly1305-glue.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/crypto/poly1305-glue.c b/arch/arm64/crypto/poly1305-glue.c index 9c3d86e397bf3a..1fae18ba11ed1f 100644 --- a/arch/arm64/crypto/poly1305-glue.c +++ b/arch/arm64/crypto/poly1305-glue.c @@ -52,7 +52,7 @@ static void neon_poly1305_blocks(struct poly1305_desc_ctx *dctx, const u8 *src, { if (unlikely(!dctx->sset)) { if (!dctx->rset) { - poly1305_init_arch(dctx, src); + poly1305_init_arm64(&dctx->h, src); src += POLY1305_BLOCK_SIZE; len -= POLY1305_BLOCK_SIZE; dctx->rset = 1; From 92343314d34e04da0923cefd3be67521d706fa35 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 7 Jun 2022 10:09:03 -0400 Subject: [PATCH 1035/1056] KVM: x86: do not report a vCPU as preempted outside instruction boundaries [ Upstream commit 6cd88243c7e03845a450795e134b488fc2afb736 ] If a vCPU is outside guest mode and is scheduled out, it might be in the process of making a memory access. A problem occurs if another vCPU uses the PV TLB flush feature during the period when the vCPU is scheduled out, and a virtual address has already been translated but has not yet been accessed, because this is equivalent to using a stale TLB entry. To avoid this, only report a vCPU as preempted if sure that the guest is at an instruction boundary. A rescheduling request will be delivered to the host physical CPU as an external interrupt, so for simplicity consider any vmexit *not* instruction boundary except for external interrupts. It would in principle be okay to report the vCPU as preempted also if it is sleeping in kvm_vcpu_block(): a TLB flush IPI will incur the vmentry/vmexit overhead unnecessarily, and optimistic spinning is also unlikely to succeed. However, leave it for later because right now kvm_vcpu_check_block() is doing memory accesses. Even though the TLB flush issue only applies to virtual memory address, it's very much preferrable to be conservative. Reported-by: Jann Horn Signed-off-by: Paolo Bonzini Signed-off-by: Sasha Levin --- arch/x86/include/asm/kvm_host.h | 3 +++ arch/x86/kvm/svm/svm.c | 2 ++ arch/x86/kvm/vmx/vmx.c | 1 + arch/x86/kvm/x86.c | 22 ++++++++++++++++++++++ 4 files changed, 28 insertions(+) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 49d814b2a341ad..a35f5e23fc2acd 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -642,6 +642,7 @@ struct kvm_vcpu_arch { u64 ia32_misc_enable_msr; u64 smbase; u64 smi_count; + bool at_instruction_boundary; bool tpr_access_reporting; bool xsaves_enabled; u64 ia32_xss; @@ -1271,6 +1272,8 @@ struct kvm_vcpu_stat { u64 nested_run; u64 directed_yield_attempted; u64 directed_yield_successful; + u64 preemption_reported; + u64 preemption_other; u64 guest_mode; }; diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 26f2da1590eda9..5b51156712f741 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -4263,6 +4263,8 @@ static int svm_check_intercept(struct kvm_vcpu *vcpu, static void svm_handle_exit_irqoff(struct kvm_vcpu *vcpu) { + if (to_svm(vcpu)->vmcb->control.exit_code == SVM_EXIT_INTR) + vcpu->arch.at_instruction_boundary = true; } static void svm_sched_in(struct kvm_vcpu *vcpu, int cpu) diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index a236104fc7439d..359292767e1778 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -6471,6 +6471,7 @@ static void handle_external_interrupt_irqoff(struct kvm_vcpu *vcpu) return; handle_interrupt_nmi_irqoff(vcpu, gate_offset(desc)); + vcpu->arch.at_instruction_boundary = true; } static void vmx_handle_exit_irqoff(struct kvm_vcpu *vcpu) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index bd410926fda592..b2436796e03c0b 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -277,6 +277,8 @@ const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = { STATS_DESC_COUNTER(VCPU, nested_run), STATS_DESC_COUNTER(VCPU, directed_yield_attempted), STATS_DESC_COUNTER(VCPU, directed_yield_successful), + STATS_DESC_COUNTER(VCPU, preemption_reported), + STATS_DESC_COUNTER(VCPU, preemption_other), STATS_DESC_ICOUNTER(VCPU, guest_mode) }; @@ -4371,6 +4373,19 @@ static void kvm_steal_time_set_preempted(struct kvm_vcpu *vcpu) struct kvm_memslots *slots; static const u8 preempted = KVM_VCPU_PREEMPTED; + /* + * The vCPU can be marked preempted if and only if the VM-Exit was on + * an instruction boundary and will not trigger guest emulation of any + * kind (see vcpu_run). Vendor specific code controls (conservatively) + * when this is true, for example allowing the vCPU to be marked + * preempted if and only if the VM-Exit was due to a host interrupt. + */ + if (!vcpu->arch.at_instruction_boundary) { + vcpu->stat.preemption_other++; + return; + } + + vcpu->stat.preemption_reported++; if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED)) return; @@ -9934,6 +9949,13 @@ static int vcpu_run(struct kvm_vcpu *vcpu) vcpu->arch.l1tf_flush_l1d = true; for (;;) { + /* + * If another guest vCPU requests a PV TLB flush in the middle + * of instruction emulation, the rest of the emulation could + * use a stale page translation. Assume that any code after + * this point can start executing an instruction. + */ + vcpu->arch.at_instruction_boundary = false; if (kvm_vcpu_running(vcpu)) { r = vcpu_enter_guest(vcpu); } else { From ad6fd99d5febfd63465ef327240c78b6d803d48d Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 7 Jun 2022 10:07:11 -0400 Subject: [PATCH 1036/1056] KVM: x86: do not set st->preempted when going back to user space [ Upstream commit 54aa83c90198e68eee8b0850c749bc70efb548da ] Similar to the Xen path, only change the vCPU's reported state if the vCPU was actually preempted. The reason for KVM's behavior is that for example optimistic spinning might not be a good idea if the guest is doing repeated exits to userspace; however, it is confusing and unlikely to make a difference, because well-tuned guests will hardly ever exit KVM_RUN in the first place. Suggested-by: Sean Christopherson Signed-off-by: Paolo Bonzini Signed-off-by: Sasha Levin --- arch/x86/kvm/x86.c | 26 ++++++++++++++------------ arch/x86/kvm/xen.h | 6 ++++-- 2 files changed, 18 insertions(+), 14 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index b2436796e03c0b..8a6ee5d8adc740 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4415,19 +4415,21 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) { int idx; - if (vcpu->preempted && !vcpu->arch.guest_state_protected) - vcpu->arch.preempted_in_kernel = !static_call(kvm_x86_get_cpl)(vcpu); + if (vcpu->preempted) { + if (!vcpu->arch.guest_state_protected) + vcpu->arch.preempted_in_kernel = !static_call(kvm_x86_get_cpl)(vcpu); - /* - * Take the srcu lock as memslots will be accessed to check the gfn - * cache generation against the memslots generation. - */ - idx = srcu_read_lock(&vcpu->kvm->srcu); - if (kvm_xen_msr_enabled(vcpu->kvm)) - kvm_xen_runstate_set_preempted(vcpu); - else - kvm_steal_time_set_preempted(vcpu); - srcu_read_unlock(&vcpu->kvm->srcu, idx); + /* + * Take the srcu lock as memslots will be accessed to check the gfn + * cache generation against the memslots generation. + */ + idx = srcu_read_lock(&vcpu->kvm->srcu); + if (kvm_xen_msr_enabled(vcpu->kvm)) + kvm_xen_runstate_set_preempted(vcpu); + else + kvm_steal_time_set_preempted(vcpu); + srcu_read_unlock(&vcpu->kvm->srcu, idx); + } static_call(kvm_x86_vcpu_put)(vcpu); vcpu->arch.last_host_tsc = rdtsc(); diff --git a/arch/x86/kvm/xen.h b/arch/x86/kvm/xen.h index cc0cf5f37450b0..a7693a286e4014 100644 --- a/arch/x86/kvm/xen.h +++ b/arch/x86/kvm/xen.h @@ -97,8 +97,10 @@ static inline void kvm_xen_runstate_set_preempted(struct kvm_vcpu *vcpu) * behalf of the vCPU. Only if the VMM does actually block * does it need to enter RUNSTATE_blocked. */ - if (vcpu->preempted) - kvm_xen_update_runstate_guest(vcpu, RUNSTATE_runnable); + if (WARN_ON_ONCE(!vcpu->preempted)) + return; + + kvm_xen_update_runstate_guest(vcpu, RUNSTATE_runnable); } /* 32-bit compatibility definitions, also used natively in 32-bit build */ From 9acd899d2febb4dd88565514d263fcdf0514ec26 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Wed, 1 Jun 2022 16:43:22 +0200 Subject: [PATCH 1037/1056] KVM: selftests: Make hyperv_clock selftest more stable [ Upstream commit eae260be3a0111a28fe95923e117a55dddec0384 ] hyperv_clock doesn't always give a stable test result, especially with AMD CPUs. The test compares Hyper-V MSR clocksource (acquired either with rdmsr() from within the guest or KVM_GET_MSRS from the host) against rdtsc(). To increase the accuracy, increase the measured delay (done with nop loop) by two orders of magnitude and take the mean rdtsc() value before and after rdmsr()/KVM_GET_MSRS. Reported-by: Maxim Levitsky Signed-off-by: Vitaly Kuznetsov Reviewed-by: Maxim Levitsky Tested-by: Maxim Levitsky Message-Id: <20220601144322.1968742-1-vkuznets@redhat.com> Signed-off-by: Paolo Bonzini Signed-off-by: Sasha Levin --- tools/testing/selftests/kvm/x86_64/hyperv_clock.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_clock.c b/tools/testing/selftests/kvm/x86_64/hyperv_clock.c index e0b2bb1339b162..3330fb183c6800 100644 --- a/tools/testing/selftests/kvm/x86_64/hyperv_clock.c +++ b/tools/testing/selftests/kvm/x86_64/hyperv_clock.c @@ -44,7 +44,7 @@ static inline void nop_loop(void) { int i; - for (i = 0; i < 1000000; i++) + for (i = 0; i < 100000000; i++) asm volatile("nop"); } @@ -56,12 +56,14 @@ static inline void check_tsc_msr_rdtsc(void) tsc_freq = rdmsr(HV_X64_MSR_TSC_FREQUENCY); GUEST_ASSERT(tsc_freq > 0); - /* First, check MSR-based clocksource */ + /* For increased accuracy, take mean rdtsc() before and afrer rdmsr() */ r1 = rdtsc(); t1 = rdmsr(HV_X64_MSR_TIME_REF_COUNT); + r1 = (r1 + rdtsc()) / 2; nop_loop(); r2 = rdtsc(); t2 = rdmsr(HV_X64_MSR_TIME_REF_COUNT); + r2 = (r2 + rdtsc()) / 2; GUEST_ASSERT(r2 > r1 && t2 > t1); @@ -181,12 +183,14 @@ static void host_check_tsc_msr_rdtsc(struct kvm_vm *vm) tsc_freq = vcpu_get_msr(vm, VCPU_ID, HV_X64_MSR_TSC_FREQUENCY); TEST_ASSERT(tsc_freq > 0, "TSC frequency must be nonzero"); - /* First, check MSR-based clocksource */ + /* For increased accuracy, take mean rdtsc() before and afrer ioctl */ r1 = rdtsc(); t1 = vcpu_get_msr(vm, VCPU_ID, HV_X64_MSR_TIME_REF_COUNT); + r1 = (r1 + rdtsc()) / 2; nop_loop(); r2 = rdtsc(); t2 = vcpu_get_msr(vm, VCPU_ID, HV_X64_MSR_TIME_REF_COUNT); + r2 = (r2 + rdtsc()) / 2; TEST_ASSERT(t2 > t1, "Time reference MSR is not monotonic (%ld <= %ld)", t1, t2); From bc2cee443c7401a15227bb7f9e7e0a967563cfd2 Mon Sep 17 00:00:00 2001 From: Dmitry Klochkov Date: Tue, 14 Jun 2022 15:11:41 +0300 Subject: [PATCH 1038/1056] tools/kvm_stat: fix display of error when multiple processes are found [ Upstream commit 933b5f9f98da29af646b51b36a0753692908ef64 ] Instead of printing an error message, kvm_stat script fails when we restrict statistics to a guest by its name and there are multiple guests with such name: # kvm_stat -g my_vm Traceback (most recent call last): File "/usr/bin/kvm_stat", line 1819, in main() File "/usr/bin/kvm_stat", line 1779, in main options = get_options() File "/usr/bin/kvm_stat", line 1718, in get_options options = argparser.parse_args() File "/usr/lib64/python3.10/argparse.py", line 1825, in parse_args args, argv = self.parse_known_args(args, namespace) File "/usr/lib64/python3.10/argparse.py", line 1858, in parse_known_args namespace, args = self._parse_known_args(args, namespace) File "/usr/lib64/python3.10/argparse.py", line 2067, in _parse_known_args start_index = consume_optional(start_index) File "/usr/lib64/python3.10/argparse.py", line 2007, in consume_optional take_action(action, args, option_string) File "/usr/lib64/python3.10/argparse.py", line 1935, in take_action action(self, namespace, argument_values, option_string) File "/usr/bin/kvm_stat", line 1649, in __call__ ' to specify the desired pid'.format(" ".join(pids))) TypeError: sequence item 0: expected str instance, int found To avoid this, it's needed to convert pids int values to strings before pass them to join(). Signed-off-by: Dmitry Klochkov Message-Id: <20220614121141.160689-1-kdmitry556@gmail.com> Signed-off-by: Paolo Bonzini Signed-off-by: Sasha Levin --- tools/kvm/kvm_stat/kvm_stat | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/kvm/kvm_stat/kvm_stat b/tools/kvm/kvm_stat/kvm_stat index 5a5bd74f55bd50..9c366b3a676db8 100755 --- a/tools/kvm/kvm_stat/kvm_stat +++ b/tools/kvm/kvm_stat/kvm_stat @@ -1646,7 +1646,8 @@ Press any other key to refresh statistics immediately. .format(values)) if len(pids) > 1: sys.exit('Error: Multiple processes found (pids: {}). Use "-p"' - ' to specify the desired pid'.format(" ".join(pids))) + ' to specify the desired pid' + .format(" ".join(map(str, pids)))) namespace.pid = pids[0] argparser = argparse.ArgumentParser(description=description_text, From e423893fe3209604236874e412dd3a9386578cff Mon Sep 17 00:00:00 2001 From: Raghavendra Rao Ananta Date: Wed, 15 Jun 2022 18:57:06 +0000 Subject: [PATCH 1039/1056] selftests: KVM: Handle compiler optimizations in ucall [ Upstream commit 9e2f6498efbbc880d7caa7935839e682b64fe5a6 ] The selftests, when built with newer versions of clang, is found to have over optimized guests' ucall() function, and eliminating the stores for uc.cmd (perhaps due to no immediate readers). This resulted in the userspace side always reading a value of '0', and causing multiple test failures. As a result, prevent the compiler from optimizing the stores in ucall() with WRITE_ONCE(). Suggested-by: Ricardo Koller Suggested-by: Reiji Watanabe Signed-off-by: Raghavendra Rao Ananta Message-Id: <20220615185706.1099208-1-rananta@google.com> Reviewed-by: Andrew Jones Signed-off-by: Paolo Bonzini Signed-off-by: Sasha Levin --- tools/testing/selftests/kvm/lib/aarch64/ucall.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/tools/testing/selftests/kvm/lib/aarch64/ucall.c b/tools/testing/selftests/kvm/lib/aarch64/ucall.c index e0b0164e9af853..be1d9728c4cea6 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/ucall.c +++ b/tools/testing/selftests/kvm/lib/aarch64/ucall.c @@ -73,20 +73,19 @@ void ucall_uninit(struct kvm_vm *vm) void ucall(uint64_t cmd, int nargs, ...) { - struct ucall uc = { - .cmd = cmd, - }; + struct ucall uc = {}; va_list va; int i; + WRITE_ONCE(uc.cmd, cmd); nargs = nargs <= UCALL_MAX_ARGS ? nargs : UCALL_MAX_ARGS; va_start(va, nargs); for (i = 0; i < nargs; ++i) - uc.args[i] = va_arg(va, uint64_t); + WRITE_ONCE(uc.args[i], va_arg(va, uint64_t)); va_end(va); - *ucall_exit_mmio_addr = (vm_vaddr_t)&uc; + WRITE_ONCE(*ucall_exit_mmio_addr, (vm_vaddr_t)&uc); } uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc) From a619a03120991b679cd6a41355f6c9199f63fd8b Mon Sep 17 00:00:00 2001 From: Mingwei Zhang Date: Thu, 23 Jun 2022 17:18:58 +0000 Subject: [PATCH 1040/1056] KVM: x86/svm: add __GFP_ACCOUNT to __sev_dbg_{en,de}crypt_user() [ Upstream commit ebdec859faa8cfbfef9f6c1f83d79dd6c8f4ab8c ] Adding the accounting flag when allocating pages within the SEV function, since these memory pages should belong to individual VM. No functional change intended. Signed-off-by: Mingwei Zhang Message-Id: <20220623171858.2083637-1-mizhang@google.com> Signed-off-by: Paolo Bonzini Signed-off-by: Sasha Levin --- arch/x86/kvm/svm/sev.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 4a4dc105552e39..86f3096f042f75 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -832,7 +832,7 @@ static int __sev_dbg_encrypt_user(struct kvm *kvm, unsigned long paddr, /* If source buffer is not aligned then use an intermediate buffer */ if (!IS_ALIGNED((unsigned long)vaddr, 16)) { - src_tpage = alloc_page(GFP_KERNEL); + src_tpage = alloc_page(GFP_KERNEL_ACCOUNT); if (!src_tpage) return -ENOMEM; @@ -853,7 +853,7 @@ static int __sev_dbg_encrypt_user(struct kvm *kvm, unsigned long paddr, if (!IS_ALIGNED((unsigned long)dst_vaddr, 16) || !IS_ALIGNED(size, 16)) { int dst_offset; - dst_tpage = alloc_page(GFP_KERNEL); + dst_tpage = alloc_page(GFP_KERNEL_ACCOUNT); if (!dst_tpage) { ret = -ENOMEM; goto e_free; From 775871d4be0d75e219cca937af843a4a1b60489a Mon Sep 17 00:00:00 2001 From: Peter Collingbourne Date: Mon, 8 Aug 2022 13:53:21 +0100 Subject: [PATCH 1041/1056] arm64: set UXN on swapper page tables [ This issue was fixed upstream by accident in c3cee924bd85 ("arm64: head: cover entire kernel image in initial ID map") as part of a large refactoring of the arm64 boot flow. This simple fix is therefore preferred for -stable backporting ] On a system that implements FEAT_EPAN, read/write access to the idmap is denied because UXN is not set on the swapper PTEs. As a result, idmap_kpti_install_ng_mappings panics the kernel when accessing __idmap_kpti_flag. Fix it by setting UXN on these PTEs. Fixes: 18107f8a2df6 ("arm64: Support execute-only permissions with Enhanced PAN") Cc: # 5.15 Link: https://linux-review.googlesource.com/id/Ic452fa4b4f74753e54f71e61027e7222a0fae1b1 Signed-off-by: Peter Collingbourne Acked-by: Will Deacon Cc: Ard Biesheuvel Cc: Catalin Marinas Link: https://lore.kernel.org/r/20220719234909.1398992-1-pcc@google.com Signed-off-by: Will Deacon Signed-off-by: Greg Kroah-Hartman --- arch/arm64/include/asm/kernel-pgtable.h | 4 ++-- arch/arm64/kernel/head.S | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm64/include/asm/kernel-pgtable.h b/arch/arm64/include/asm/kernel-pgtable.h index 96dc0f7da258d9..a971d462f531c0 100644 --- a/arch/arm64/include/asm/kernel-pgtable.h +++ b/arch/arm64/include/asm/kernel-pgtable.h @@ -103,8 +103,8 @@ /* * Initial memory map attributes. */ -#define SWAPPER_PTE_FLAGS (PTE_TYPE_PAGE | PTE_AF | PTE_SHARED) -#define SWAPPER_PMD_FLAGS (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S) +#define SWAPPER_PTE_FLAGS (PTE_TYPE_PAGE | PTE_AF | PTE_SHARED | PTE_UXN) +#define SWAPPER_PMD_FLAGS (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S | PMD_SECT_UXN) #if ARM64_KERNEL_USES_PMD_MAPS #define SWAPPER_MM_MMUFLAGS (PMD_ATTRINDX(MT_NORMAL) | SWAPPER_PMD_FLAGS) diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 17962452e31de6..ab6566bf1c3321 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -285,7 +285,7 @@ SYM_FUNC_START_LOCAL(__create_page_tables) subs x1, x1, #64 b.ne 1b - mov x7, SWAPPER_MM_MMUFLAGS + mov_q x7, SWAPPER_MM_MMUFLAGS /* * Create the identity mapping. From 5e04c8bf42d810986b609b989955347c2913b1be Mon Sep 17 00:00:00 2001 From: Naohiro Aota Date: Tue, 7 Jun 2022 16:08:29 +0900 Subject: [PATCH 1042/1056] btrfs: zoned: prevent allocation from previous data relocation BG commit 343d8a30851c48a4ef0f5ef61d5e9fbd847a6883 upstream. After commit 5f0addf7b890 ("btrfs: zoned: use dedicated lock for data relocation"), we observe IO errors on e.g, btrfs/232 like below. [09.0][T4038707] WARNING: CPU: 3 PID: 4038707 at fs/btrfs/extent-tree.c:2381 btrfs_cross_ref_exist+0xfc/0x120 [btrfs] [09.9][T4038707] Call Trace: [09.5][T4038707] [09.3][T4038707] run_delalloc_nocow+0x7f1/0x11a0 [btrfs] [09.6][T4038707] ? test_range_bit+0x174/0x320 [btrfs] [09.2][T4038707] ? fallback_to_cow+0x980/0x980 [btrfs] [09.3][T4038707] ? find_lock_delalloc_range+0x33e/0x3e0 [btrfs] [09.5][T4038707] btrfs_run_delalloc_range+0x445/0x1320 [btrfs] [09.2][T4038707] ? test_range_bit+0x320/0x320 [btrfs] [09.4][T4038707] ? lock_downgrade+0x6a0/0x6a0 [09.2][T4038707] ? orc_find.part.0+0x1ed/0x300 [09.5][T4038707] ? __module_address.part.0+0x25/0x300 [09.0][T4038707] writepage_delalloc+0x159/0x310 [btrfs] [09.4][ C3] sd 10:0:1:0: [sde] tag#2620 FAILED Result: hostbyte=DID_OK driverbyte=DRIVER_OK cmd_age=0s [09.5][ C3] sd 10:0:1:0: [sde] tag#2620 Sense Key : Illegal Request [current] [09.9][ C3] sd 10:0:1:0: [sde] tag#2620 Add. Sense: Unaligned write command [09.5][ C3] sd 10:0:1:0: [sde] tag#2620 CDB: Write(16) 8a 00 00 00 00 00 02 f3 63 87 00 00 00 2c 00 00 [09.4][ C3] critical target error, dev sde, sector 396041272 op 0x1:(WRITE) flags 0x800 phys_seg 3 prio class 0 [09.9][ C3] BTRFS error (device dm-1): bdev /dev/mapper/dml_102_2 errs: wr 1, rd 0, flush 0, corrupt 0, gen 0 The IO errors occur when we allocate a regular extent in previous data relocation block group. On zoned btrfs, we use a dedicated block group to relocate a data extent. Thus, we allocate relocating data extents (pre-alloc) only from the dedicated block group and vice versa. Once the free space in the dedicated block group gets tight, a relocating extent may not fit into the block group. In that case, we need to switch the dedicated block group to the next one. Then, the previous one is now freed up for allocating a regular extent. The BG is already not enough to allocate the relocating extent, but there is still room to allocate a smaller extent. Now the problem happens. By allocating a regular extent while nocow IOs for the relocation is still on-going, we will issue WRITE IOs (for relocation) and ZONE APPEND IOs (for the regular writes) at the same time. That mixed IOs confuses the write pointer and arises the unaligned write errors. This commit introduces a new bit 'zoned_data_reloc_ongoing' to the btrfs_block_group. We set this bit before releasing the dedicated block group, and no extent are allocated from a block group having this bit set. This bit is similar to setting block_group->ro, but is different from it by allowing nocow writes to start. Once all the nocow IO for relocation is done (hooked from btrfs_finish_ordered_io), we reset the bit to release the block group for further allocation. Fixes: c2707a255623 ("btrfs: zoned: add a dedicated data relocation block group") CC: stable@vger.kernel.org # 5.16+ Signed-off-by: Naohiro Aota Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/block-group.h | 1 + fs/btrfs/extent-tree.c | 20 ++++++++++++++++++-- fs/btrfs/inode.c | 2 ++ fs/btrfs/zoned.c | 27 +++++++++++++++++++++++++++ fs/btrfs/zoned.h | 5 +++++ 5 files changed, 53 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/block-group.h b/fs/btrfs/block-group.h index 37e55ebde735bc..d73db0dfacb26a 100644 --- a/fs/btrfs/block-group.h +++ b/fs/btrfs/block-group.h @@ -98,6 +98,7 @@ struct btrfs_block_group { unsigned int to_copy:1; unsigned int relocating_repair:1; unsigned int chunk_item_inserted:1; + unsigned int zoned_data_reloc_ongoing:1; int disk_cache_state; diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index e3514f9a4e8ddb..248ea15c973469 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -3804,7 +3804,7 @@ static int do_allocation_zoned(struct btrfs_block_group *block_group, block_group->start == fs_info->data_reloc_bg || fs_info->data_reloc_bg == 0); - if (block_group->ro) { + if (block_group->ro || block_group->zoned_data_reloc_ongoing) { ret = 1; goto out; } @@ -3865,8 +3865,24 @@ static int do_allocation_zoned(struct btrfs_block_group *block_group, out: if (ret && ffe_ctl->for_treelog) fs_info->treelog_bg = 0; - if (ret && ffe_ctl->for_data_reloc) + if (ret && ffe_ctl->for_data_reloc && + fs_info->data_reloc_bg == block_group->start) { + /* + * Do not allow further allocations from this block group. + * Compared to increasing the ->ro, setting the + * ->zoned_data_reloc_ongoing flag still allows nocow + * writers to come in. See btrfs_inc_nocow_writers(). + * + * We need to disable an allocation to avoid an allocation of + * regular (non-relocation data) extent. With mix of relocation + * extents and regular extents, we can dispatch WRITE commands + * (for relocation extents) and ZONE APPEND commands (for + * regular extents) at the same time to the same zone, which + * easily break the write pointer. + */ + block_group->zoned_data_reloc_ongoing = 1; fs_info->data_reloc_bg = 0; + } spin_unlock(&fs_info->relocation_bg_lock); spin_unlock(&fs_info->treelog_bg_lock); spin_unlock(&block_group->lock); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index ea726205079015..1b4fee8a2f28b6 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -3069,6 +3069,8 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent) ordered_extent->file_offset, ordered_extent->file_offset + logical_len); + btrfs_zoned_release_data_reloc_bg(fs_info, ordered_extent->disk_bytenr, + ordered_extent->disk_num_bytes); } else { BUG_ON(root == fs_info->tree_root); ret = insert_ordered_extent_file_extent(trans, ordered_extent); diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c index 574769f921a22b..fc791f7c71428f 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c @@ -1623,3 +1623,30 @@ void btrfs_free_zone_cache(struct btrfs_fs_info *fs_info) } mutex_unlock(&fs_devices->device_list_mutex); } + +void btrfs_zoned_release_data_reloc_bg(struct btrfs_fs_info *fs_info, u64 logical, + u64 length) +{ + struct btrfs_block_group *block_group; + + if (!btrfs_is_zoned(fs_info)) + return; + + block_group = btrfs_lookup_block_group(fs_info, logical); + /* It should be called on a previous data relocation block group. */ + ASSERT(block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA)); + + spin_lock(&block_group->lock); + if (!block_group->zoned_data_reloc_ongoing) + goto out; + + /* All relocation extents are written. */ + if (block_group->start + block_group->alloc_offset == logical + length) { + /* Now, release this block group for further allocations. */ + block_group->zoned_data_reloc_ongoing = 0; + } + +out: + spin_unlock(&block_group->lock); + btrfs_put_block_group(block_group); +} diff --git a/fs/btrfs/zoned.h b/fs/btrfs/zoned.h index 3a826f7c204030..574490ea2cc87f 100644 --- a/fs/btrfs/zoned.h +++ b/fs/btrfs/zoned.h @@ -70,6 +70,8 @@ struct btrfs_device *btrfs_zoned_get_device(struct btrfs_fs_info *fs_info, u64 logical, u64 length); void btrfs_clear_data_reloc_bg(struct btrfs_block_group *bg); void btrfs_free_zone_cache(struct btrfs_fs_info *fs_info); +void btrfs_zoned_release_data_reloc_bg(struct btrfs_fs_info *fs_info, u64 logical, + u64 length); #else /* CONFIG_BLK_DEV_ZONED */ static inline int btrfs_get_dev_zone(struct btrfs_device *device, u64 pos, struct blk_zone *zone) @@ -207,6 +209,9 @@ static inline struct btrfs_device *btrfs_zoned_get_device( static inline void btrfs_clear_data_reloc_bg(struct btrfs_block_group *bg) { } static inline void btrfs_free_zone_cache(struct btrfs_fs_info *fs_info) { } + +static inline void btrfs_zoned_release_data_reloc_bg(struct btrfs_fs_info *fs_info, + u64 logical, u64 length) { } #endif static inline bool btrfs_dev_is_sequential(struct btrfs_device *device, u64 pos) From 37b385c78cd57edcc9c2350a37763012c28982cd Mon Sep 17 00:00:00 2001 From: Naohiro Aota Date: Tue, 7 Jun 2022 16:08:30 +0900 Subject: [PATCH 1043/1056] btrfs: zoned: fix critical section of relocation inode writeback commit 19ab78ca86981e0e1e73036fb73a508731a7c078 upstream. We use btrfs_zoned_data_reloc_{lock,unlock} to allow only one process to write out to the relocation inode. That critical section must include all the IO submission for the inode. However, flush_write_bio() in extent_writepages() is out of the critical section, causing an IO submission outside of the lock. This leads to an out of the order IO submission and fail the relocation process. Fix it by extending the critical section. Fixes: 35156d852762 ("btrfs: zoned: only allow one process to add pages to a relocation inode") CC: stable@vger.kernel.org # 5.16+ Reviewed-by: Johannes Thumshirn Signed-off-by: Naohiro Aota Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/extent_io.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index b791e280af0ca0..a90546b3107c50 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -5152,13 +5152,14 @@ int extent_writepages(struct address_space *mapping, */ btrfs_zoned_data_reloc_lock(BTRFS_I(inode)); ret = extent_write_cache_pages(mapping, wbc, &epd); - btrfs_zoned_data_reloc_unlock(BTRFS_I(inode)); ASSERT(ret <= 0); if (ret < 0) { + btrfs_zoned_data_reloc_unlock(BTRFS_I(inode)); end_write_bio(&epd, ret); return ret; } ret = flush_write_bio(&epd); + btrfs_zoned_data_reloc_unlock(BTRFS_I(inode)); return ret; } From 2aa38f0af306230ae4fb15b73b4c18f9209758cb Mon Sep 17 00:00:00 2001 From: Ahmad Fatoum Date: Tue, 24 May 2022 07:56:41 +0200 Subject: [PATCH 1044/1056] Bluetooth: hci_bcm: Add BCM4349B1 variant commit 4f17c2b6694d0c4098f33b07ee3a696976940aa5 upstream. The BCM4349B1, aka CYW/BCM89359, is a WiFi+BT chip and its Bluetooth portion can be controlled over serial. Two subversions are added for the chip, because ROM firmware reports 002.002.013 (at least for the chips I have here), while depending on patchram firmware revision, either 002.002.013 or 002.002.014 is reported. Signed-off-by: Ahmad Fatoum Reviewed-by: Linus Walleij Signed-off-by: Marcel Holtmann Signed-off-by: Greg Kroah-Hartman --- drivers/bluetooth/btbcm.c | 2 ++ drivers/bluetooth/hci_bcm.c | 1 + 2 files changed, 3 insertions(+) diff --git a/drivers/bluetooth/btbcm.c b/drivers/bluetooth/btbcm.c index d9ceca7a7935c5..a18f289d73466d 100644 --- a/drivers/bluetooth/btbcm.c +++ b/drivers/bluetooth/btbcm.c @@ -453,6 +453,8 @@ static const struct bcm_subver_table bcm_uart_subver_table[] = { { 0x6606, "BCM4345C5" }, /* 003.006.006 */ { 0x230f, "BCM4356A2" }, /* 001.003.015 */ { 0x220e, "BCM20702A1" }, /* 001.002.014 */ + { 0x420d, "BCM4349B1" }, /* 002.002.013 */ + { 0x420e, "BCM4349B1" }, /* 002.002.014 */ { 0x4217, "BCM4329B1" }, /* 002.002.023 */ { 0x6106, "BCM4359C0" }, /* 003.001.006 */ { 0x4106, "BCM4335A0" }, /* 002.001.006 */ diff --git a/drivers/bluetooth/hci_bcm.c b/drivers/bluetooth/hci_bcm.c index 7abf99f0ee3997..b47a31bd2e9bab 100644 --- a/drivers/bluetooth/hci_bcm.c +++ b/drivers/bluetooth/hci_bcm.c @@ -1515,6 +1515,7 @@ static const struct of_device_id bcm_bluetooth_of_match[] = { { .compatible = "brcm,bcm4345c5" }, { .compatible = "brcm,bcm4330-bt" }, { .compatible = "brcm,bcm43438-bt", .data = &bcm43438_device_data }, + { .compatible = "brcm,bcm4349-bt", .data = &bcm43438_device_data }, { .compatible = "brcm,bcm43540-bt", .data = &bcm4354_device_data }, { .compatible = "brcm,bcm4335a0" }, { }, From 88e088e29487913c302b7480b866266f8971427f Mon Sep 17 00:00:00 2001 From: Hakan Jansson Date: Thu, 30 Jun 2022 14:45:22 +0200 Subject: [PATCH 1045/1056] Bluetooth: hci_bcm: Add DT compatible for CYW55572 commit f8cad62002a7699fd05a23b558b980b5a77defe0 upstream. CYW55572 is a Wi-Fi + Bluetooth combo device from Infineon. Signed-off-by: Hakan Jansson Reviewed-by: Linus Walleij Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Greg Kroah-Hartman --- drivers/bluetooth/hci_bcm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/bluetooth/hci_bcm.c b/drivers/bluetooth/hci_bcm.c index b47a31bd2e9bab..cf622e45960558 100644 --- a/drivers/bluetooth/hci_bcm.c +++ b/drivers/bluetooth/hci_bcm.c @@ -1518,6 +1518,7 @@ static const struct of_device_id bcm_bluetooth_of_match[] = { { .compatible = "brcm,bcm4349-bt", .data = &bcm43438_device_data }, { .compatible = "brcm,bcm43540-bt", .data = &bcm4354_device_data }, { .compatible = "brcm,bcm4335a0" }, + { .compatible = "infineon,cyw55572-bt" }, { }, }; MODULE_DEVICE_TABLE(of, bcm_bluetooth_of_match); From 6a5ec48fb7528aea20805534a867d0cc631994f8 Mon Sep 17 00:00:00 2001 From: Ahmad Fatoum Date: Tue, 24 May 2022 07:56:40 +0200 Subject: [PATCH 1046/1056] dt-bindings: bluetooth: broadcom: Add BCM4349B1 DT binding commit 88b65887aa1b76cd8649a97824fb9904c1d79254 upstream. The BCM4349B1, aka CYW/BCM89359, is a WiFi+BT chip and its Bluetooth portion can be controlled over serial. Extend the binding with its DT compatible. Acked-by: Krzysztof Kozlowski Reviewed-by: Linus Walleij Signed-off-by: Ahmad Fatoum Signed-off-by: Marcel Holtmann Signed-off-by: Greg Kroah-Hartman --- Documentation/devicetree/bindings/net/broadcom-bluetooth.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/net/broadcom-bluetooth.yaml b/Documentation/devicetree/bindings/net/broadcom-bluetooth.yaml index fbdc2083bec4f2..20ee96584aba24 100644 --- a/Documentation/devicetree/bindings/net/broadcom-bluetooth.yaml +++ b/Documentation/devicetree/bindings/net/broadcom-bluetooth.yaml @@ -23,6 +23,7 @@ properties: - brcm,bcm4345c5 - brcm,bcm43540-bt - brcm,bcm4335a0 + - brcm,bcm4349-bt shutdown-gpios: maxItems: 1 From 04e3388eeb4709a8f079f92a48e8e034cc7e5e50 Mon Sep 17 00:00:00 2001 From: Aaron Ma Date: Thu, 2 Jun 2022 17:28:22 +0800 Subject: [PATCH 1047/1056] Bluetooth: btusb: Add support of IMC Networks PID 0x3568 commit c69ecb0ea4c96b8b191cbaa0b420222a37867655 upstream. It is 13d3:3568 for MediaTek MT7922 USB Bluetooth chip. T: Bus=03 Lev=01 Prnt=01 Port=02 Cnt=01 Dev#= 2 Spd=480 MxCh= 0 D: Ver= 2.10 Cls=ef(misc ) Sub=02 Prot=01 MxPS=64 #Cfgs= 1 P: Vendor=13d3 ProdID=3568 Rev=01.00 S: Manufacturer=MediaTek Inc. S: Product=Wireless_Device S: SerialNumber=... C: #Ifs= 3 Cfg#= 1 Atr=e0 MxPwr=100mA I: If#= 0 Alt= 0 #EPs= 3 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=81(I) Atr=03(Int.) MxPS= 16 Ivl=125us E: Ad=82(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 1 Alt= 0 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=03(O) Atr=01(Isoc) MxPS= 0 Ivl=1ms E: Ad=83(I) Atr=01(Isoc) MxPS= 0 Ivl=1ms I: If#= 2 Alt= 0 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=(none) E: Ad=0a(O) Atr=03(Int.) MxPS= 64 Ivl=125us E: Ad=8a(I) Atr=03(Int.) MxPS= 64 Ivl=125us Signed-off-by: Aaron Ma Signed-off-by: Marcel Holtmann Signed-off-by: Greg Kroah-Hartman --- drivers/bluetooth/btusb.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index a68edbc7be0ff2..26ff826b5f1688 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -459,6 +459,9 @@ static const struct usb_device_id blacklist_table[] = { { USB_DEVICE(0x0489, 0xe0d9), .driver_info = BTUSB_MEDIATEK | BTUSB_WIDEBAND_SPEECH | BTUSB_VALID_LE_STATES }, + { USB_DEVICE(0x13d3, 0x3568), .driver_info = BTUSB_MEDIATEK | + BTUSB_WIDEBAND_SPEECH | + BTUSB_VALID_LE_STATES }, /* Additional Realtek 8723AE Bluetooth devices */ { USB_DEVICE(0x0930, 0x021d), .driver_info = BTUSB_REALTEK }, From d4f921efb4bf6b75375ed459be785b0c6415daec Mon Sep 17 00:00:00 2001 From: Hilda Wu Date: Thu, 14 Jul 2022 19:25:19 +0800 Subject: [PATCH 1048/1056] Bluetooth: btusb: Add Realtek RTL8852C support ID 0x04CA:0x4007 commit c379c96cc221767af9688a5d4758a78eea30883a upstream. Add the support ID(0x04CA, 0x4007) to usb_device_id table for Realtek RTL8852C. The device info from /sys/kernel/debug/usb/devices as below. T: Bus=03 Lev=01 Prnt=01 Port=02 Cnt=01 Dev#= 2 Spd=12 MxCh= 0 D: Ver= 1.00 Cls=e0(wlcon) Sub=01 Prot=01 MxPS=64 #Cfgs= 1 P: Vendor=04ca ProdID=4007 Rev= 0.00 S: Manufacturer=Realtek S: Product=Bluetooth Radio S: SerialNumber=00e04c000001 C:* #Ifs= 2 Cfg#= 1 Atr=e0 MxPwr=500mA I:* If#= 0 Alt= 0 #EPs= 3 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=81(I) Atr=03(Int.) MxPS= 16 Ivl=1ms E: Ad=02(O) Atr=02(Bulk) MxPS= 64 Ivl=0ms E: Ad=82(I) Atr=02(Bulk) MxPS= 64 Ivl=0ms I:* If#= 1 Alt= 0 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=03(O) Atr=01(Isoc) MxPS= 0 Ivl=1ms E: Ad=83(I) Atr=01(Isoc) MxPS= 0 Ivl=1ms I: If#= 1 Alt= 1 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=03(O) Atr=01(Isoc) MxPS= 9 Ivl=1ms E: Ad=83(I) Atr=01(Isoc) MxPS= 9 Ivl=1ms I: If#= 1 Alt= 2 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=03(O) Atr=01(Isoc) MxPS= 17 Ivl=1ms E: Ad=83(I) Atr=01(Isoc) MxPS= 17 Ivl=1ms I: If#= 1 Alt= 3 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=03(O) Atr=01(Isoc) MxPS= 25 Ivl=1ms E: Ad=83(I) Atr=01(Isoc) MxPS= 25 Ivl=1ms I: If#= 1 Alt= 4 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=03(O) Atr=01(Isoc) MxPS= 33 Ivl=1ms E: Ad=83(I) Atr=01(Isoc) MxPS= 33 Ivl=1ms I: If#= 1 Alt= 5 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=03(O) Atr=01(Isoc) MxPS= 49 Ivl=1ms E: Ad=83(I) Atr=01(Isoc) MxPS= 49 Ivl=1ms Signed-off-by: Hilda Wu Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Greg Kroah-Hartman --- drivers/bluetooth/btusb.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index 26ff826b5f1688..1196d1c37d37d7 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -420,6 +420,10 @@ static const struct usb_device_id blacklist_table[] = { { USB_DEVICE(0x04ca, 0x4006), .driver_info = BTUSB_REALTEK | BTUSB_WIDEBAND_SPEECH }, + /* Realtek 8852CE Bluetooth devices */ + { USB_DEVICE(0x04ca, 0x4007), .driver_info = BTUSB_REALTEK | + BTUSB_WIDEBAND_SPEECH }, + /* Realtek Bluetooth devices */ { USB_VENDOR_AND_INTERFACE_INFO(0x0bda, 0xe0, 0x01, 0x01), .driver_info = BTUSB_REALTEK }, From b653eeaa8cf8d909a52410069a9ccdcb545bc9bb Mon Sep 17 00:00:00 2001 From: Hilda Wu Date: Thu, 14 Jul 2022 19:25:20 +0800 Subject: [PATCH 1049/1056] Bluetooth: btusb: Add Realtek RTL8852C support ID 0x04C5:0x1675 commit 893fa8bc9952a36fb682ee12f0a994b5817a36d2 upstream. Add the support ID(0x04c5, 0x1675) to usb_device_id table for Realtek RTL8852C. The device info from /sys/kernel/debug/usb/devices as below. T: Bus=03 Lev=01 Prnt=01 Port=02 Cnt=01 Dev#= 2 Spd=12 MxCh= 0 D: Ver= 1.00 Cls=e0(wlcon) Sub=01 Prot=01 MxPS=64 #Cfgs= 1 P: Vendor=04c5 ProdID=1675 Rev= 0.00 S: Manufacturer=Realtek S: Product=Bluetooth Radio S: SerialNumber=00e04c000001 C:* #Ifs= 2 Cfg#= 1 Atr=e0 MxPwr=500mA I:* If#= 0 Alt= 0 #EPs= 3 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=81(I) Atr=03(Int.) MxPS= 16 Ivl=1ms E: Ad=02(O) Atr=02(Bulk) MxPS= 64 Ivl=0ms E: Ad=82(I) Atr=02(Bulk) MxPS= 64 Ivl=0ms I:* If#= 1 Alt= 0 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=03(O) Atr=01(Isoc) MxPS= 0 Ivl=1ms E: Ad=83(I) Atr=01(Isoc) MxPS= 0 Ivl=1ms I: If#= 1 Alt= 1 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=03(O) Atr=01(Isoc) MxPS= 9 Ivl=1ms E: Ad=83(I) Atr=01(Isoc) MxPS= 9 Ivl=1ms I: If#= 1 Alt= 2 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=03(O) Atr=01(Isoc) MxPS= 17 Ivl=1ms E: Ad=83(I) Atr=01(Isoc) MxPS= 17 Ivl=1ms I: If#= 1 Alt= 3 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=03(O) Atr=01(Isoc) MxPS= 25 Ivl=1ms E: Ad=83(I) Atr=01(Isoc) MxPS= 25 Ivl=1ms I: If#= 1 Alt= 4 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=03(O) Atr=01(Isoc) MxPS= 33 Ivl=1ms E: Ad=83(I) Atr=01(Isoc) MxPS= 33 Ivl=1ms I: If#= 1 Alt= 5 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=03(O) Atr=01(Isoc) MxPS= 49 Ivl=1ms E: Ad=83(I) Atr=01(Isoc) MxPS= 49 Ivl=1ms Signed-off-by: Hilda Wu Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Greg Kroah-Hartman --- drivers/bluetooth/btusb.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index 1196d1c37d37d7..250612c24dd1ab 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -423,6 +423,8 @@ static const struct usb_device_id blacklist_table[] = { /* Realtek 8852CE Bluetooth devices */ { USB_DEVICE(0x04ca, 0x4007), .driver_info = BTUSB_REALTEK | BTUSB_WIDEBAND_SPEECH }, + { USB_DEVICE(0x04c5, 0x1675), .driver_info = BTUSB_REALTEK | + BTUSB_WIDEBAND_SPEECH }, /* Realtek Bluetooth devices */ { USB_VENDOR_AND_INTERFACE_INFO(0x0bda, 0xe0, 0x01, 0x01), From 59689a843bc9bbc42046084998da7efe1fbc331f Mon Sep 17 00:00:00 2001 From: Hilda Wu Date: Thu, 14 Jul 2022 19:25:21 +0800 Subject: [PATCH 1050/1056] Bluetooth: btusb: Add Realtek RTL8852C support ID 0x0CB8:0xC558 commit 5b75ee37ebb73f58468d4cca172434324af203f1 upstream. Add the support ID(0x0CB8, 0xC558) to usb_device_id table for Realtek RTL8852C. The device info from /sys/kernel/debug/usb/devices as below. T: Bus=03 Lev=01 Prnt=01 Port=02 Cnt=01 Dev#= 2 Spd=12 MxCh= 0 D: Ver= 1.00 Cls=e0(wlcon) Sub=01 Prot=01 MxPS=64 #Cfgs= 1 P: Vendor=0cb8 ProdID=c558 Rev= 0.00 S: Manufacturer=Realtek S: Product=Bluetooth Radio S: SerialNumber=00e04c000001 C:* #Ifs= 2 Cfg#= 1 Atr=e0 MxPwr=500mA I:* If#= 0 Alt= 0 #EPs= 3 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=81(I) Atr=03(Int.) MxPS= 16 Ivl=1ms E: Ad=02(O) Atr=02(Bulk) MxPS= 64 Ivl=0ms E: Ad=82(I) Atr=02(Bulk) MxPS= 64 Ivl=0ms I:* If#= 1 Alt= 0 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=03(O) Atr=01(Isoc) MxPS= 0 Ivl=1ms E: Ad=83(I) Atr=01(Isoc) MxPS= 0 Ivl=1ms I: If#= 1 Alt= 1 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=03(O) Atr=01(Isoc) MxPS= 9 Ivl=1ms E: Ad=83(I) Atr=01(Isoc) MxPS= 9 Ivl=1ms I: If#= 1 Alt= 2 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=03(O) Atr=01(Isoc) MxPS= 17 Ivl=1ms E: Ad=83(I) Atr=01(Isoc) MxPS= 17 Ivl=1ms I: If#= 1 Alt= 3 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=03(O) Atr=01(Isoc) MxPS= 25 Ivl=1ms E: Ad=83(I) Atr=01(Isoc) MxPS= 25 Ivl=1ms I: If#= 1 Alt= 4 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=03(O) Atr=01(Isoc) MxPS= 33 Ivl=1ms E: Ad=83(I) Atr=01(Isoc) MxPS= 33 Ivl=1ms I: If#= 1 Alt= 5 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=03(O) Atr=01(Isoc) MxPS= 49 Ivl=1ms E: Ad=83(I) Atr=01(Isoc) MxPS= 49 Ivl=1ms Signed-off-by: Hilda Wu Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Greg Kroah-Hartman --- drivers/bluetooth/btusb.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index 250612c24dd1ab..ac48f74a65a444 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -425,6 +425,8 @@ static const struct usb_device_id blacklist_table[] = { BTUSB_WIDEBAND_SPEECH }, { USB_DEVICE(0x04c5, 0x1675), .driver_info = BTUSB_REALTEK | BTUSB_WIDEBAND_SPEECH }, + { USB_DEVICE(0x0cb8, 0xc558), .driver_info = BTUSB_REALTEK | + BTUSB_WIDEBAND_SPEECH }, /* Realtek Bluetooth devices */ { USB_VENDOR_AND_INTERFACE_INFO(0x0bda, 0xe0, 0x01, 0x01), From ee421ad8973b5abb8f3948b4bfab1810b420717f Mon Sep 17 00:00:00 2001 From: Hilda Wu Date: Thu, 14 Jul 2022 19:25:22 +0800 Subject: [PATCH 1051/1056] Bluetooth: btusb: Add Realtek RTL8852C support ID 0x13D3:0x3587 commit 8f0054dd29373cd877db87751c143610561d549d upstream. Add the support ID(0x13D3, 0x3587) to usb_device_id table for Realtek RTL8852C. The device info from /sys/kernel/debug/usb/devices as below. T: Bus=03 Lev=01 Prnt=01 Port=02 Cnt=01 Dev#= 2 Spd=12 MxCh= 0 D: Ver= 1.00 Cls=e0(wlcon) Sub=01 Prot=01 MxPS=64 #Cfgs= 1 P: Vendor=13d3 ProdID=3587 Rev= 0.00 S: Manufacturer=Realtek S: Product=Bluetooth Radio S: SerialNumber=00e04c000001 C:* #Ifs= 2 Cfg#= 1 Atr=e0 MxPwr=500mA I:* If#= 0 Alt= 0 #EPs= 3 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=81(I) Atr=03(Int.) MxPS= 16 Ivl=1ms E: Ad=02(O) Atr=02(Bulk) MxPS= 64 Ivl=0ms E: Ad=82(I) Atr=02(Bulk) MxPS= 64 Ivl=0ms I:* If#= 1 Alt= 0 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=03(O) Atr=01(Isoc) MxPS= 0 Ivl=1ms E: Ad=83(I) Atr=01(Isoc) MxPS= 0 Ivl=1ms I: If#= 1 Alt= 1 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=03(O) Atr=01(Isoc) MxPS= 9 Ivl=1ms E: Ad=83(I) Atr=01(Isoc) MxPS= 9 Ivl=1ms I: If#= 1 Alt= 2 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=03(O) Atr=01(Isoc) MxPS= 17 Ivl=1ms E: Ad=83(I) Atr=01(Isoc) MxPS= 17 Ivl=1ms I: If#= 1 Alt= 3 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=03(O) Atr=01(Isoc) MxPS= 25 Ivl=1ms E: Ad=83(I) Atr=01(Isoc) MxPS= 25 Ivl=1ms I: If#= 1 Alt= 4 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=03(O) Atr=01(Isoc) MxPS= 33 Ivl=1ms E: Ad=83(I) Atr=01(Isoc) MxPS= 33 Ivl=1ms I: If#= 1 Alt= 5 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=03(O) Atr=01(Isoc) MxPS= 49 Ivl=1ms E: Ad=83(I) Atr=01(Isoc) MxPS= 49 Ivl=1ms Signed-off-by: Hilda Wu Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Greg Kroah-Hartman --- drivers/bluetooth/btusb.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index ac48f74a65a444..85233179b96082 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -427,6 +427,8 @@ static const struct usb_device_id blacklist_table[] = { BTUSB_WIDEBAND_SPEECH }, { USB_DEVICE(0x0cb8, 0xc558), .driver_info = BTUSB_REALTEK | BTUSB_WIDEBAND_SPEECH }, + { USB_DEVICE(0x13d3, 0x3587), .driver_info = BTUSB_REALTEK | + BTUSB_WIDEBAND_SPEECH }, /* Realtek Bluetooth devices */ { USB_VENDOR_AND_INTERFACE_INFO(0x0bda, 0xe0, 0x01, 0x01), From d98cf2b40c2059eefa757fb1ab2607e5483caa05 Mon Sep 17 00:00:00 2001 From: Hilda Wu Date: Thu, 14 Jul 2022 19:25:23 +0800 Subject: [PATCH 1052/1056] Bluetooth: btusb: Add Realtek RTL8852C support ID 0x13D3:0x3586 commit 6ad353dfc8ee3230a5e123c21da50f1b64cc4b39 upstream. Add the support ID(0x13D3, 0x3586) to usb_device_id table for Realtek RTL8852C. The device info from /sys/kernel/debug/usb/devices as below. T: Bus=03 Lev=01 Prnt=01 Port=02 Cnt=01 Dev#= 2 Spd=12 MxCh= 0 D: Ver= 1.00 Cls=e0(wlcon) Sub=01 Prot=01 MxPS=64 #Cfgs= 1 P: Vendor=13d3 ProdID=3586 Rev= 0.00 S: Manufacturer=Realtek S: Product=Bluetooth Radio S: SerialNumber=00e04c000001 C:* #Ifs= 2 Cfg#= 1 Atr=e0 MxPwr=500mA I:* If#= 0 Alt= 0 #EPs= 3 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=81(I) Atr=03(Int.) MxPS= 16 Ivl=1ms E: Ad=02(O) Atr=02(Bulk) MxPS= 64 Ivl=0ms E: Ad=82(I) Atr=02(Bulk) MxPS= 64 Ivl=0ms I:* If#= 1 Alt= 0 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=03(O) Atr=01(Isoc) MxPS= 0 Ivl=1ms E: Ad=83(I) Atr=01(Isoc) MxPS= 0 Ivl=1ms I: If#= 1 Alt= 1 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=03(O) Atr=01(Isoc) MxPS= 9 Ivl=1ms E: Ad=83(I) Atr=01(Isoc) MxPS= 9 Ivl=1ms I: If#= 1 Alt= 2 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=03(O) Atr=01(Isoc) MxPS= 17 Ivl=1ms E: Ad=83(I) Atr=01(Isoc) MxPS= 17 Ivl=1ms I: If#= 1 Alt= 3 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=03(O) Atr=01(Isoc) MxPS= 25 Ivl=1ms E: Ad=83(I) Atr=01(Isoc) MxPS= 25 Ivl=1ms I: If#= 1 Alt= 4 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=03(O) Atr=01(Isoc) MxPS= 33 Ivl=1ms E: Ad=83(I) Atr=01(Isoc) MxPS= 33 Ivl=1ms I: If#= 1 Alt= 5 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=03(O) Atr=01(Isoc) MxPS= 49 Ivl=1ms E: Ad=83(I) Atr=01(Isoc) MxPS= 49 Ivl=1ms Signed-off-by: Hilda Wu Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Greg Kroah-Hartman --- drivers/bluetooth/btusb.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index 85233179b96082..627436329b5023 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -429,6 +429,8 @@ static const struct usb_device_id blacklist_table[] = { BTUSB_WIDEBAND_SPEECH }, { USB_DEVICE(0x13d3, 0x3587), .driver_info = BTUSB_REALTEK | BTUSB_WIDEBAND_SPEECH }, + { USB_DEVICE(0x13d3, 0x3586), .driver_info = BTUSB_REALTEK | + BTUSB_WIDEBAND_SPEECH }, /* Realtek Bluetooth devices */ { USB_VENDOR_AND_INTERFACE_INFO(0x0bda, 0xe0, 0x01, 0x01), From c81d1bb58c88ab831e02539a26ad47860e8d6f0c Mon Sep 17 00:00:00 2001 From: Ning Qiang Date: Wed, 13 Jul 2022 23:37:34 +0800 Subject: [PATCH 1053/1056] macintosh/adb: fix oob read in do_adb_query() function commit fd97e4ad6d3b0c9fce3bca8ea8e6969d9ce7423b upstream. In do_adb_query() function of drivers/macintosh/adb.c, req->data is copied form userland. The parameter "req->data[2]" is missing check, the array size of adb_handler[] is 16, so adb_handler[req->data[2]].original_address and adb_handler[req->data[2]].handler_id will lead to oob read. Cc: stable Signed-off-by: Ning Qiang Reviewed-by: Kees Cook Reviewed-by: Greg Kroah-Hartman Acked-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220713153734.2248-1-sohu0106@126.com Signed-off-by: Greg Kroah-Hartman --- drivers/macintosh/adb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/macintosh/adb.c b/drivers/macintosh/adb.c index 73b39618903977..afb0942ccc2934 100644 --- a/drivers/macintosh/adb.c +++ b/drivers/macintosh/adb.c @@ -647,7 +647,7 @@ do_adb_query(struct adb_request *req) switch(req->data[1]) { case ADB_QUERY_GETDEVINFO: - if (req->nbytes < 3) + if (req->nbytes < 3 || req->data[2] >= 16) break; mutex_lock(&adb_handler_mutex); req->reply[0] = adb_handler[req->data[2]].original_address; From 7fcd99e889c0634f8275ae7a6b06aec4a22c8715 Mon Sep 17 00:00:00 2001 From: Daniel Sneddon Date: Tue, 2 Aug 2022 15:47:01 -0700 Subject: [PATCH 1054/1056] x86/speculation: Add RSB VM Exit protections commit 2b1299322016731d56807aa49254a5ea3080b6b3 upstream. tl;dr: The Enhanced IBRS mitigation for Spectre v2 does not work as documented for RET instructions after VM exits. Mitigate it with a new one-entry RSB stuffing mechanism and a new LFENCE. == Background == Indirect Branch Restricted Speculation (IBRS) was designed to help mitigate Branch Target Injection and Speculative Store Bypass, i.e. Spectre, attacks. IBRS prevents software run in less privileged modes from affecting branch prediction in more privileged modes. IBRS requires the MSR to be written on every privilege level change. To overcome some of the performance issues of IBRS, Enhanced IBRS was introduced. eIBRS is an "always on" IBRS, in other words, just turn it on once instead of writing the MSR on every privilege level change. When eIBRS is enabled, more privileged modes should be protected from less privileged modes, including protecting VMMs from guests. == Problem == Here's a simplification of how guests are run on Linux' KVM: void run_kvm_guest(void) { // Prepare to run guest VMRESUME(); // Clean up after guest runs } The execution flow for that would look something like this to the processor: 1. Host-side: call run_kvm_guest() 2. Host-side: VMRESUME 3. Guest runs, does "CALL guest_function" 4. VM exit, host runs again 5. Host might make some "cleanup" function calls 6. Host-side: RET from run_kvm_guest() Now, when back on the host, there are a couple of possible scenarios of post-guest activity the host needs to do before executing host code: * on pre-eIBRS hardware (legacy IBRS, or nothing at all), the RSB is not touched and Linux has to do a 32-entry stuffing. * on eIBRS hardware, VM exit with IBRS enabled, or restoring the host IBRS=1 shortly after VM exit, has a documented side effect of flushing the RSB except in this PBRSB situation where the software needs to stuff the last RSB entry "by hand". IOW, with eIBRS supported, host RET instructions should no longer be influenced by guest behavior after the host retires a single CALL instruction. However, if the RET instructions are "unbalanced" with CALLs after a VM exit as is the RET in #6, it might speculatively use the address for the instruction after the CALL in #3 as an RSB prediction. This is a problem since the (untrusted) guest controls this address. Balanced CALL/RET instruction pairs such as in step #5 are not affected. == Solution == The PBRSB issue affects a wide variety of Intel processors which support eIBRS. But not all of them need mitigation. Today, X86_FEATURE_RSB_VMEXIT triggers an RSB filling sequence that mitigates PBRSB. Systems setting RSB_VMEXIT need no further mitigation - i.e., eIBRS systems which enable legacy IBRS explicitly. However, such systems (X86_FEATURE_IBRS_ENHANCED) do not set RSB_VMEXIT and most of them need a new mitigation. Therefore, introduce a new feature flag X86_FEATURE_RSB_VMEXIT_LITE which triggers a lighter-weight PBRSB mitigation versus RSB_VMEXIT. The lighter-weight mitigation performs a CALL instruction which is immediately followed by a speculative execution barrier (INT3). This steers speculative execution to the barrier -- just like a retpoline -- which ensures that speculation can never reach an unbalanced RET. Then, ensure this CALL is retired before continuing execution with an LFENCE. In other words, the window of exposure is opened at VM exit where RET behavior is troublesome. While the window is open, force RSB predictions sampling for RET targets to a dead end at the INT3. Close the window with the LFENCE. There is a subset of eIBRS systems which are not vulnerable to PBRSB. Add these systems to the cpu_vuln_whitelist[] as NO_EIBRS_PBRSB. Future systems that aren't vulnerable will set ARCH_CAP_PBRSB_NO. [ bp: Massage, incorporate review comments from Andy Cooper. ] Signed-off-by: Daniel Sneddon Co-developed-by: Pawan Gupta Signed-off-by: Pawan Gupta Signed-off-by: Borislav Petkov Signed-off-by: Greg Kroah-Hartman --- Documentation/admin-guide/hw-vuln/spectre.rst | 8 ++ arch/x86/include/asm/cpufeatures.h | 2 + arch/x86/include/asm/msr-index.h | 4 + arch/x86/include/asm/nospec-branch.h | 17 +++- arch/x86/kernel/cpu/bugs.c | 86 ++++++++++++++----- arch/x86/kernel/cpu/common.c | 12 ++- arch/x86/kvm/vmx/vmenter.S | 8 +- tools/arch/x86/include/asm/cpufeatures.h | 1 + tools/arch/x86/include/asm/msr-index.h | 4 + 9 files changed, 113 insertions(+), 29 deletions(-) diff --git a/Documentation/admin-guide/hw-vuln/spectre.rst b/Documentation/admin-guide/hw-vuln/spectre.rst index 6bd97cd50d6256..7e061ed449aaab 100644 --- a/Documentation/admin-guide/hw-vuln/spectre.rst +++ b/Documentation/admin-guide/hw-vuln/spectre.rst @@ -422,6 +422,14 @@ The possible values in this file are: 'RSB filling' Protection of RSB on context switch enabled ============= =========================================== + - EIBRS Post-barrier Return Stack Buffer (PBRSB) protection status: + + =========================== ======================================================= + 'PBRSB-eIBRS: SW sequence' CPU is affected and protection of RSB on VMEXIT enabled + 'PBRSB-eIBRS: Vulnerable' CPU is vulnerable + 'PBRSB-eIBRS: Not affected' CPU is not affected by PBRSB + =========================== ======================================================= + Full mitigation might require a microcode update from the CPU vendor. When the necessary microcode is not available, the kernel will report vulnerability. diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index d370718e222bab..be744fa1000485 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -301,6 +301,7 @@ #define X86_FEATURE_RETHUNK (11*32+14) /* "" Use REturn THUNK */ #define X86_FEATURE_UNRET (11*32+15) /* "" AMD BTB untrain return */ #define X86_FEATURE_USE_IBPB_FW (11*32+16) /* "" Use IBPB during runtime firmware calls */ +#define X86_FEATURE_RSB_VMEXIT_LITE (11*32+17) /* "" Fill RSB on VM exit when EIBRS is enabled */ /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */ #define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */ @@ -446,5 +447,6 @@ #define X86_BUG_SRBDS X86_BUG(24) /* CPU may leak RNG bits if not mitigated */ #define X86_BUG_MMIO_STALE_DATA X86_BUG(25) /* CPU is affected by Processor MMIO Stale Data vulnerabilities */ #define X86_BUG_RETBLEED X86_BUG(26) /* CPU is affected by RETBleed */ +#define X86_BUG_EIBRS_PBRSB X86_BUG(27) /* EIBRS is vulnerable to Post Barrier RSB Predictions */ #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index ec2967e7249f51..8f38265bc81dc7 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -148,6 +148,10 @@ * are restricted to targets in * kernel. */ +#define ARCH_CAP_PBRSB_NO BIT(24) /* + * Not susceptible to Post-Barrier + * Return Stack Buffer Predictions. + */ #define MSR_IA32_FLUSH_CMD 0x0000010b #define L1D_FLUSH BIT(0) /* diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 9a79b96e55214f..96597246f7931b 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -118,13 +118,28 @@ #endif .endm +.macro ISSUE_UNBALANCED_RET_GUARD + ANNOTATE_INTRA_FUNCTION_CALL + call .Lunbalanced_ret_guard_\@ + int3 +.Lunbalanced_ret_guard_\@: + add $(BITS_PER_LONG/8), %_ASM_SP + lfence +.endm + /* * A simpler FILL_RETURN_BUFFER macro. Don't make people use the CPP * monstrosity above, manually. */ -.macro FILL_RETURN_BUFFER reg:req nr:req ftr:req +.macro FILL_RETURN_BUFFER reg:req nr:req ftr:req ftr2 +.ifb \ftr2 ALTERNATIVE "jmp .Lskip_rsb_\@", "", \ftr +.else + ALTERNATIVE_2 "jmp .Lskip_rsb_\@", "", \ftr, "jmp .Lunbalanced_\@", \ftr2 +.endif __FILL_RETURN_BUFFER(\reg,\nr,%_ASM_SP) +.Lunbalanced_\@: + ISSUE_UNBALANCED_RET_GUARD .Lskip_rsb_\@: .endm diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index a37814c8547e41..837e617f3b76d0 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -1328,6 +1328,53 @@ static void __init spec_ctrl_disable_kernel_rrsba(void) } } +static void __init spectre_v2_determine_rsb_fill_type_at_vmexit(enum spectre_v2_mitigation mode) +{ + /* + * Similar to context switches, there are two types of RSB attacks + * after VM exit: + * + * 1) RSB underflow + * + * 2) Poisoned RSB entry + * + * When retpoline is enabled, both are mitigated by filling/clearing + * the RSB. + * + * When IBRS is enabled, while #1 would be mitigated by the IBRS branch + * prediction isolation protections, RSB still needs to be cleared + * because of #2. Note that SMEP provides no protection here, unlike + * user-space-poisoned RSB entries. + * + * eIBRS should protect against RSB poisoning, but if the EIBRS_PBRSB + * bug is present then a LITE version of RSB protection is required, + * just a single call needs to retire before a RET is executed. + */ + switch (mode) { + case SPECTRE_V2_NONE: + return; + + case SPECTRE_V2_EIBRS_LFENCE: + case SPECTRE_V2_EIBRS: + if (boot_cpu_has_bug(X86_BUG_EIBRS_PBRSB)) { + setup_force_cpu_cap(X86_FEATURE_RSB_VMEXIT_LITE); + pr_info("Spectre v2 / PBRSB-eIBRS: Retire a single CALL on VMEXIT\n"); + } + return; + + case SPECTRE_V2_EIBRS_RETPOLINE: + case SPECTRE_V2_RETPOLINE: + case SPECTRE_V2_LFENCE: + case SPECTRE_V2_IBRS: + setup_force_cpu_cap(X86_FEATURE_RSB_VMEXIT); + pr_info("Spectre v2 / SpectreRSB : Filling RSB on VMEXIT\n"); + return; + } + + pr_warn_once("Unknown Spectre v2 mode, disabling RSB mitigation at VM exit"); + dump_stack(); +} + static void __init spectre_v2_select_mitigation(void) { enum spectre_v2_mitigation_cmd cmd = spectre_v2_parse_cmdline(); @@ -1478,28 +1525,7 @@ static void __init spectre_v2_select_mitigation(void) setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW); pr_info("Spectre v2 / SpectreRSB mitigation: Filling RSB on context switch\n"); - /* - * Similar to context switches, there are two types of RSB attacks - * after vmexit: - * - * 1) RSB underflow - * - * 2) Poisoned RSB entry - * - * When retpoline is enabled, both are mitigated by filling/clearing - * the RSB. - * - * When IBRS is enabled, while #1 would be mitigated by the IBRS branch - * prediction isolation protections, RSB still needs to be cleared - * because of #2. Note that SMEP provides no protection here, unlike - * user-space-poisoned RSB entries. - * - * eIBRS, on the other hand, has RSB-poisoning protections, so it - * doesn't need RSB clearing after vmexit. - */ - if (boot_cpu_has(X86_FEATURE_RETPOLINE) || - boot_cpu_has(X86_FEATURE_KERNEL_IBRS)) - setup_force_cpu_cap(X86_FEATURE_RSB_VMEXIT); + spectre_v2_determine_rsb_fill_type_at_vmexit(mode); /* * Retpoline protects the kernel, but doesn't protect firmware. IBRS @@ -2285,6 +2311,19 @@ static char *ibpb_state(void) return ""; } +static char *pbrsb_eibrs_state(void) +{ + if (boot_cpu_has_bug(X86_BUG_EIBRS_PBRSB)) { + if (boot_cpu_has(X86_FEATURE_RSB_VMEXIT_LITE) || + boot_cpu_has(X86_FEATURE_RSB_VMEXIT)) + return ", PBRSB-eIBRS: SW sequence"; + else + return ", PBRSB-eIBRS: Vulnerable"; + } else { + return ", PBRSB-eIBRS: Not affected"; + } +} + static ssize_t spectre_v2_show_state(char *buf) { if (spectre_v2_enabled == SPECTRE_V2_LFENCE) @@ -2297,12 +2336,13 @@ static ssize_t spectre_v2_show_state(char *buf) spectre_v2_enabled == SPECTRE_V2_EIBRS_LFENCE) return sprintf(buf, "Vulnerable: eIBRS+LFENCE with unprivileged eBPF and SMT\n"); - return sprintf(buf, "%s%s%s%s%s%s\n", + return sprintf(buf, "%s%s%s%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled], ibpb_state(), boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "", stibp_state(), boot_cpu_has(X86_FEATURE_RSB_CTXSW) ? ", RSB filling" : "", + pbrsb_eibrs_state(), spectre_v2_module_string()); } diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 80cc41f7978309..4a538ec413b8b6 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1027,6 +1027,7 @@ static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c) #define NO_SWAPGS BIT(6) #define NO_ITLB_MULTIHIT BIT(7) #define NO_SPECTRE_V2 BIT(8) +#define NO_EIBRS_PBRSB BIT(9) #define VULNWL(vendor, family, model, whitelist) \ X86_MATCH_VENDOR_FAM_MODEL(vendor, family, model, whitelist) @@ -1067,7 +1068,7 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = { VULNWL_INTEL(ATOM_GOLDMONT, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT), VULNWL_INTEL(ATOM_GOLDMONT_D, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT), - VULNWL_INTEL(ATOM_GOLDMONT_PLUS, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT), + VULNWL_INTEL(ATOM_GOLDMONT_PLUS, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_EIBRS_PBRSB), /* * Technically, swapgs isn't serializing on AMD (despite it previously @@ -1077,7 +1078,9 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = { * good enough for our purposes. */ - VULNWL_INTEL(ATOM_TREMONT_D, NO_ITLB_MULTIHIT), + VULNWL_INTEL(ATOM_TREMONT, NO_EIBRS_PBRSB), + VULNWL_INTEL(ATOM_TREMONT_L, NO_EIBRS_PBRSB), + VULNWL_INTEL(ATOM_TREMONT_D, NO_ITLB_MULTIHIT | NO_EIBRS_PBRSB), /* AMD Family 0xf - 0x12 */ VULNWL_AMD(0x0f, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT), @@ -1255,6 +1258,11 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) setup_force_cpu_bug(X86_BUG_RETBLEED); } + if (cpu_has(c, X86_FEATURE_IBRS_ENHANCED) && + !cpu_matches(cpu_vuln_whitelist, NO_EIBRS_PBRSB) && + !(ia32_cap & ARCH_CAP_PBRSB_NO)) + setup_force_cpu_bug(X86_BUG_EIBRS_PBRSB); + if (cpu_matches(cpu_vuln_whitelist, NO_MELTDOWN)) return; diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S index 857fa0fc49fafd..982138bebb70f3 100644 --- a/arch/x86/kvm/vmx/vmenter.S +++ b/arch/x86/kvm/vmx/vmenter.S @@ -197,11 +197,13 @@ SYM_INNER_LABEL(vmx_vmexit, SYM_L_GLOBAL) * entries and (in some cases) RSB underflow. * * eIBRS has its own protection against poisoned RSB, so it doesn't - * need the RSB filling sequence. But it does need to be enabled - * before the first unbalanced RET. + * need the RSB filling sequence. But it does need to be enabled, and a + * single call to retire, before the first unbalanced RET. */ - FILL_RETURN_BUFFER %_ASM_CX, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_VMEXIT + FILL_RETURN_BUFFER %_ASM_CX, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_VMEXIT,\ + X86_FEATURE_RSB_VMEXIT_LITE + pop %_ASM_ARG2 /* @flags */ pop %_ASM_ARG1 /* @vmx */ diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h index 3781a7f489ef37..bcaedfe60572f8 100644 --- a/tools/arch/x86/include/asm/cpufeatures.h +++ b/tools/arch/x86/include/asm/cpufeatures.h @@ -300,6 +300,7 @@ #define X86_FEATURE_RETPOLINE_LFENCE (11*32+13) /* "" Use LFENCE for Spectre variant 2 */ #define X86_FEATURE_RETHUNK (11*32+14) /* "" Use REturn THUNK */ #define X86_FEATURE_UNRET (11*32+15) /* "" AMD BTB untrain return */ +#define X86_FEATURE_RSB_VMEXIT_LITE (11*32+17) /* "" Fill RSB on VM-Exit when EIBRS is enabled */ /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */ #define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */ diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h index ec2967e7249f51..8f38265bc81dc7 100644 --- a/tools/arch/x86/include/asm/msr-index.h +++ b/tools/arch/x86/include/asm/msr-index.h @@ -148,6 +148,10 @@ * are restricted to targets in * kernel. */ +#define ARCH_CAP_PBRSB_NO BIT(24) /* + * Not susceptible to Post-Barrier + * Return Stack Buffer Predictions. + */ #define MSR_IA32_FLUSH_CMD 0x0000010b #define L1D_FLUSH BIT(0) /* From 5c5c77746ce1108833d1fda005598a749eaef2cb Mon Sep 17 00:00:00 2001 From: Pawan Gupta Date: Tue, 2 Aug 2022 15:47:02 -0700 Subject: [PATCH 1055/1056] x86/speculation: Add LFENCE to RSB fill sequence commit ba6e31af2be96c4d0536f2152ed6f7b6c11bca47 upstream. RSB fill sequence does not have any protection for miss-prediction of conditional branch at the end of the sequence. CPU can speculatively execute code immediately after the sequence, while RSB filling hasn't completed yet. #define __FILL_RETURN_BUFFER(reg, nr, sp) \ mov $(nr/2), reg; \ 771: \ ANNOTATE_INTRA_FUNCTION_CALL; \ call 772f; \ 773: /* speculation trap */ \ UNWIND_HINT_EMPTY; \ pause; \ lfence; \ jmp 773b; \ 772: \ ANNOTATE_INTRA_FUNCTION_CALL; \ call 774f; \ 775: /* speculation trap */ \ UNWIND_HINT_EMPTY; \ pause; \ lfence; \ jmp 775b; \ 774: \ add $(BITS_PER_LONG/8) * 2, sp; \ dec reg; \ jnz 771b; <----- CPU can miss-predict here. Before RSB is filled, RETs that come in program order after this macro can be executed speculatively, making them vulnerable to RSB-based attacks. Mitigate it by adding an LFENCE after the conditional branch to prevent speculation while RSB is being filled. Suggested-by: Andrew Cooper Signed-off-by: Pawan Gupta Signed-off-by: Borislav Petkov Signed-off-by: Daniel Sneddon Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/nospec-branch.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 96597246f7931b..6a59b2d58a3a9c 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -60,7 +60,9 @@ 774: \ add $(BITS_PER_LONG/8) * 2, sp; \ dec reg; \ - jnz 771b; + jnz 771b; \ + /* barrier for jnz misprediction */ \ + lfence; #ifdef __ASSEMBLY__ From 7217df81279835a7aee62a07aabb7b8fb8c766f2 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 11 Aug 2022 13:07:54 +0200 Subject: [PATCH 1056/1056] Linux 5.15.60 Link: https://lore.kernel.org/r/20220809175514.276643253@linuxfoundation.org Tested-by: Florian Fainelli Tested-by: Bagas Sanjaya Tested-by: Linux Kernel Functional Testing Tested-by: Sudip Mukherjee Tested-by: Guenter Roeck Tested-by: Jon Hunter Tested-by: Shuah Khan Tested-by: Ron Economos Signed-off-by: Greg Kroah-Hartman --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 22bca3948306b4..4ea646f496c9c2 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 5 PATCHLEVEL = 15 -SUBLEVEL = 59 +SUBLEVEL = 60 EXTRAVERSION = NAME = Trick or Treat