Skip to content

Commit

Permalink
Merge tag 'drm-fixes-2022-10-21' of git://anongit.freedesktop.org/drm…
Browse files Browse the repository at this point in the history
…/drm

Pull drm fixes from Dave Airlie:
 "Usual fixes for the week.

  The amdgpu contains fixes for two regressions, one reported in
  response to rc1 which broke on SI GPUs, and one gfx9 APU regression.

  Otherwise it's mostly fixes for new IP, and some GPU reset fixes. vc4
  is just HDMI fixes, and panfrost has some mnor types fixes.

  Core:
   - fix connector DDC pointer
   - fix buffer overflow in format_helper_test

  amdgpu:
   - Mode2 reset fixes for Sienna Cichlid
   - Revert broken fan speed sensor fix
   - SMU 13.x fixes
   - GC 11.x fixes
   - RAS fixes
   - SR-IOV fixes
   - Fix BO move breakage on SI
   - Misc compiler fixes
   - Fix gfx9 APU regression caused by PCI AER fix

  vc4:
   - HDMI fixes

  panfrost:
   - compiler fixes"

* tag 'drm-fixes-2022-10-21' of git://anongit.freedesktop.org/drm/drm: (35 commits)
  drm/amdgpu: fix sdma doorbell init ordering on APUs
  drm/panfrost: replace endian-specific types with native ones
  drm/panfrost: Remove type name from internal structs
  drm/connector: Set DDC pointer in drmm_connector_init
  drm: tests: Fix a buffer overflow in format_helper_test
  drm/amdgpu: use DRM_SCHED_FENCE_DONT_PIPELINE for VM updates
  drm/sched: add DRM_SCHED_FENCE_DONT_PIPELINE flag
  drm/amdgpu: Fix for BO move issue
  drm/amdgpu: dequeue mes scheduler during fini
  drm/amd/pm: enable thermal alert on smu_v13_0_10
  drm/amdgpu: Program GC registers through RLCG interface in gfx_v11/gmc_v11
  drm/amdkfd: Fix type of reset_type parameter in hqd_destroy() callback
  drm/amd/display: Increase frame size limit for display_mode_vba_util_32.o
  drm/amd/pm: add SMU IP v13.0.4 IF version define to V7
  drm/amd/pm: update SMU IP v13.0.4 driver interface version
  drm/amd/pm: Init pm_attr_list when dpm is disabled
  drm/amd/pm: disable cstate feature for gpu reset scenario
  drm/amd/pm: fulfill SMU13.0.7 cstate control interface
  drm/amd/pm: fulfill SMU13.0.0 cstate control interface
  drm/amdgpu: Add sriov vf ras support in amdgpu_ras_asic_supported
  ...
  • Loading branch information
torvalds committed Oct 21, 2022
2 parents 6d36c72 + cbc543c commit e35184f
Show file tree
Hide file tree
Showing 45 changed files with 314 additions and 134 deletions.
4 changes: 0 additions & 4 deletions drivers/gpu/drm/amd/amdgpu/amdgpu.h
Original file line number Diff line number Diff line change
Expand Up @@ -274,9 +274,6 @@ extern int amdgpu_vcnfw_log;
#define AMDGPU_RESET_VCE (1 << 13)
#define AMDGPU_RESET_VCE1 (1 << 14)

#define AMDGPU_RESET_LEVEL_SOFT_RECOVERY (1 << 0)
#define AMDGPU_RESET_LEVEL_MODE2 (1 << 1)

/* max cursor sizes (in pixels) */
#define CIK_CURSOR_WIDTH 128
#define CIK_CURSOR_HEIGHT 128
Expand Down Expand Up @@ -1065,7 +1062,6 @@ struct amdgpu_device {

struct work_struct reset_work;

uint32_t amdgpu_reset_level_mask;
bool job_hang;
};

Expand Down
1 change: 0 additions & 1 deletion drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,6 @@ static void amdgpu_amdkfd_reset_work(struct work_struct *work)
reset_context.method = AMD_RESET_METHOD_NONE;
reset_context.reset_req_dev = adev;
clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
clear_bit(AMDGPU_SKIP_MODE2_RESET, &reset_context.flags);

amdgpu_device_gpu_recover(adev, NULL, &reset_context);
}
Expand Down
2 changes: 1 addition & 1 deletion drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ static int init_interrupts_v11(struct amdgpu_device *adev, uint32_t pipe_id)

lock_srbm(adev, mec, pipe, 0, 0);

WREG32(SOC15_REG_OFFSET(GC, 0, regCPC_INT_CNTL),
WREG32_SOC15(GC, 0, regCPC_INT_CNTL,
CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK |
CP_INT_CNTL_RING0__OPCODE_ERROR_INT_ENABLE_MASK);

Expand Down
2 changes: 0 additions & 2 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
Original file line number Diff line number Diff line change
Expand Up @@ -1954,8 +1954,6 @@ int amdgpu_debugfs_init(struct amdgpu_device *adev)
return PTR_ERR(ent);
}

debugfs_create_u32("amdgpu_reset_level", 0600, root, &adev->amdgpu_reset_level_mask);

/* Register debugfs entries for amdgpu_ttm */
amdgpu_ttm_debugfs_init(adev);
amdgpu_debugfs_pm_init(adev);
Expand Down
15 changes: 9 additions & 6 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
Original file line number Diff line number Diff line change
Expand Up @@ -2928,6 +2928,14 @@ static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)
amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);

/*
* Per PMFW team's suggestion, driver needs to handle gfxoff
* and df cstate features disablement for gpu reset(e.g. Mode1Reset)
* scenario. Add the missing df cstate disablement here.
*/
if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_DISALLOW))
dev_warn(adev->dev, "Failed to disallow df cstate");

for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
if (!adev->ip_blocks[i].status.valid)
continue;
Expand Down Expand Up @@ -5210,7 +5218,6 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,

reset_context->job = job;
reset_context->hive = hive;

/*
* Build list of devices to reset.
* In case we are in XGMI hive mode, resort the device list
Expand Down Expand Up @@ -5337,11 +5344,8 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
amdgpu_ras_resume(adev);
} else {
r = amdgpu_do_asic_reset(device_list_handle, reset_context);
if (r && r == -EAGAIN) {
set_bit(AMDGPU_SKIP_MODE2_RESET, &reset_context->flags);
adev->asic_reset_res = 0;
if (r && r == -EAGAIN)
goto retry;
}

if (!r && gpu_reset_for_dev_remove)
goto recover_end;
Expand Down Expand Up @@ -5777,7 +5781,6 @@ pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev)
reset_context.reset_req_dev = adev;
set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
set_bit(AMDGPU_SKIP_HW_RESET, &reset_context.flags);
set_bit(AMDGPU_SKIP_MODE2_RESET, &reset_context.flags);

adev->no_hw_access = true;
r = amdgpu_device_pre_asic_reset(adev, &reset_context);
Expand Down
1 change: 0 additions & 1 deletion drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,6 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job)
reset_context.method = AMD_RESET_METHOD_NONE;
reset_context.reset_req_dev = adev;
clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
clear_bit(AMDGPU_SKIP_MODE2_RESET, &reset_context.flags);

r = amdgpu_device_gpu_recover(ring->adev, job, &reset_context);
if (r)
Expand Down
25 changes: 19 additions & 6 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
Original file line number Diff line number Diff line change
Expand Up @@ -1950,7 +1950,6 @@ static void amdgpu_ras_do_recovery(struct work_struct *work)
reset_context.method = AMD_RESET_METHOD_NONE;
reset_context.reset_req_dev = adev;
clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
clear_bit(AMDGPU_SKIP_MODE2_RESET, &reset_context.flags);

amdgpu_device_gpu_recover(ras->adev, NULL, &reset_context);
}
Expand Down Expand Up @@ -2268,6 +2267,25 @@ static int amdgpu_ras_recovery_fini(struct amdgpu_device *adev)

static bool amdgpu_ras_asic_supported(struct amdgpu_device *adev)
{
if (amdgpu_sriov_vf(adev)) {
switch (adev->ip_versions[MP0_HWIP][0]) {
case IP_VERSION(13, 0, 2):
return true;
default:
return false;
}
}

if (adev->asic_type == CHIP_IP_DISCOVERY) {
switch (adev->ip_versions[MP0_HWIP][0]) {
case IP_VERSION(13, 0, 0):
case IP_VERSION(13, 0, 10):
return true;
default:
return false;
}
}

return adev->asic_type == CHIP_VEGA10 ||
adev->asic_type == CHIP_VEGA20 ||
adev->asic_type == CHIP_ARCTURUS ||
Expand Down Expand Up @@ -2311,11 +2329,6 @@ static void amdgpu_ras_check_supported(struct amdgpu_device *adev)
!amdgpu_ras_asic_supported(adev))
return;

/* If driver run on sriov guest side, only enable ras for aldebaran */
if (amdgpu_sriov_vf(adev) &&
adev->ip_versions[MP1_HWIP][0] != IP_VERSION(13, 0, 2))
return;

if (!adev->gmc.xgmi.connected_to_cpu) {
if (amdgpu_atomfirmware_mem_ecc_supported(adev)) {
dev_info(adev->dev, "MEM ECC is active.\n");
Expand Down
14 changes: 0 additions & 14 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,6 @@ int amdgpu_reset_init(struct amdgpu_device *adev)
{
int ret = 0;

adev->amdgpu_reset_level_mask = 0x1;

switch (adev->ip_versions[MP1_HWIP][0]) {
case IP_VERSION(13, 0, 2):
ret = aldebaran_reset_init(adev);
Expand Down Expand Up @@ -76,12 +74,6 @@ int amdgpu_reset_prepare_hwcontext(struct amdgpu_device *adev,
{
struct amdgpu_reset_handler *reset_handler = NULL;

if (!(adev->amdgpu_reset_level_mask & AMDGPU_RESET_LEVEL_MODE2))
return -ENOSYS;

if (test_bit(AMDGPU_SKIP_MODE2_RESET, &reset_context->flags))
return -ENOSYS;

if (adev->reset_cntl && adev->reset_cntl->get_reset_handler)
reset_handler = adev->reset_cntl->get_reset_handler(
adev->reset_cntl, reset_context);
Expand All @@ -98,12 +90,6 @@ int amdgpu_reset_perform_reset(struct amdgpu_device *adev,
int ret;
struct amdgpu_reset_handler *reset_handler = NULL;

if (!(adev->amdgpu_reset_level_mask & AMDGPU_RESET_LEVEL_MODE2))
return -ENOSYS;

if (test_bit(AMDGPU_SKIP_MODE2_RESET, &reset_context->flags))
return -ENOSYS;

if (adev->reset_cntl)
reset_handler = adev->reset_cntl->get_reset_handler(
adev->reset_cntl, reset_context);
Expand Down
3 changes: 1 addition & 2 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,7 @@ enum AMDGPU_RESET_FLAGS {

AMDGPU_NEED_FULL_RESET = 0,
AMDGPU_SKIP_HW_RESET = 1,
AMDGPU_SKIP_MODE2_RESET = 2,
AMDGPU_RESET_FOR_DEVICE_REMOVE = 3,
AMDGPU_RESET_FOR_DEVICE_REMOVE = 2,
};

struct amdgpu_reset_context {
Expand Down
3 changes: 0 additions & 3 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
Original file line number Diff line number Diff line change
Expand Up @@ -405,9 +405,6 @@ bool amdgpu_ring_soft_recovery(struct amdgpu_ring *ring, unsigned int vmid,
{
ktime_t deadline = ktime_add_us(ktime_get(), 10000);

if (!(ring->adev->amdgpu_reset_level_mask & AMDGPU_RESET_LEVEL_SOFT_RECOVERY))
return false;

if (amdgpu_sriov_vf(ring->adev) || !ring->funcs->soft_recovery || !fence)
return false;

Expand Down
3 changes: 3 additions & 0 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
Original file line number Diff line number Diff line change
Expand Up @@ -439,6 +439,9 @@ static bool amdgpu_mem_visible(struct amdgpu_device *adev,
while (cursor.remaining) {
amdgpu_res_next(&cursor, cursor.size);

if (!cursor.remaining)
break;

/* ttm_resource_ioremap only supports contiguous memory */
if (end != cursor.start)
return false;
Expand Down
6 changes: 6 additions & 0 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
Original file line number Diff line number Diff line change
Expand Up @@ -726,6 +726,12 @@ void amdgpu_detect_virtualization(struct amdgpu_device *adev)
adev->virt.caps |= AMDGPU_PASSTHROUGH_MODE;
}

if (amdgpu_sriov_vf(adev) && adev->asic_type == CHIP_SIENNA_CICHLID)
/* VF MMIO access (except mailbox range) from CPU
* will be blocked during sriov runtime
*/
adev->virt.caps |= AMDGPU_VF_MMIO_ACCESS_PROTECT;

/* we have the ability to check now */
if (amdgpu_sriov_vf(adev)) {
switch (adev->asic_type) {
Expand Down
4 changes: 4 additions & 0 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
#define AMDGPU_SRIOV_CAPS_IS_VF (1 << 2) /* this GPU is a virtual function */
#define AMDGPU_PASSTHROUGH_MODE (1 << 3) /* thw whole GPU is pass through for VM */
#define AMDGPU_SRIOV_CAPS_RUNTIME (1 << 4) /* is out of full access mode */
#define AMDGPU_VF_MMIO_ACCESS_PROTECT (1 << 5) /* MMIO write access is not allowed in sriov runtime */

/* flags for indirect register access path supported by rlcg for sriov */
#define AMDGPU_RLCG_GC_WRITE_LEGACY (0x8 << 28)
Expand Down Expand Up @@ -297,6 +298,9 @@ struct amdgpu_video_codec_info;
#define amdgpu_passthrough(adev) \
((adev)->virt.caps & AMDGPU_PASSTHROUGH_MODE)

#define amdgpu_sriov_vf_mmio_access_protection(adev) \
((adev)->virt.caps & AMDGPU_VF_MMIO_ACCESS_PROTECT)

static inline bool is_virtual_machine(void)
{
#if defined(CONFIG_X86)
Expand Down
6 changes: 5 additions & 1 deletion drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
Original file line number Diff line number Diff line change
Expand Up @@ -2338,7 +2338,11 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev)
*/
#ifdef CONFIG_X86_64
if (amdgpu_vm_update_mode == -1) {
if (amdgpu_gmc_vram_full_visible(&adev->gmc))
/* For asic with VF MMIO access protection
* avoid using CPU for VM table updates
*/
if (amdgpu_gmc_vram_full_visible(&adev->gmc) &&
!amdgpu_sriov_vf_mmio_access_protection(adev))
adev->vm_manager.vm_update_mode =
AMDGPU_VM_USE_CPU_FOR_COMPUTE;
else
Expand Down
9 changes: 8 additions & 1 deletion drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
Original file line number Diff line number Diff line change
Expand Up @@ -116,8 +116,15 @@ static int amdgpu_vm_sdma_commit(struct amdgpu_vm_update_params *p,
DMA_RESV_USAGE_BOOKKEEP);
}

if (fence && !p->immediate)
if (fence && !p->immediate) {
/*
* Most hw generations now have a separate queue for page table
* updates, but when the queue is shared with userspace we need
* the extra CPU round trip to correctly flush the TLB.
*/
set_bit(DRM_SCHED_FENCE_DONT_PIPELINE, &f->flags);
swap(*fence, f);
}
dma_fence_put(f);
return 0;

Expand Down
3 changes: 2 additions & 1 deletion drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
Original file line number Diff line number Diff line change
Expand Up @@ -1571,7 +1571,7 @@ static void gfx_v11_0_init_compute_vmid(struct amdgpu_device *adev)
WREG32_SOC15(GC, 0, regSH_MEM_BASES, sh_mem_bases);

/* Enable trap for each kfd vmid. */
data = RREG32(SOC15_REG_OFFSET(GC, 0, regSPI_GDBG_PER_VMID_CNTL));
data = RREG32_SOC15(GC, 0, regSPI_GDBG_PER_VMID_CNTL);
data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, 1);
}
soc21_grbm_select(adev, 0, 0, 0, 0);
Expand Down Expand Up @@ -5076,6 +5076,7 @@ static int gfx_v11_0_set_clockgating_state(void *handle,
case IP_VERSION(11, 0, 0):
case IP_VERSION(11, 0, 1):
case IP_VERSION(11, 0, 2):
case IP_VERSION(11, 0, 3):
gfx_v11_0_update_gfx_clock_gating(adev,
state == AMD_CG_STATE_GATE);
break;
Expand Down
18 changes: 11 additions & 7 deletions drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,10 @@ static void gmc_v11_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid,
/* Use register 17 for GART */
const unsigned eng = 17;
unsigned int i;
unsigned char hub_ip = 0;

hub_ip = (vmhub == AMDGPU_GFXHUB_0) ?
GC_HWIP : MMHUB_HWIP;

spin_lock(&adev->gmc.invalidate_lock);
/*
Expand All @@ -199,8 +203,8 @@ static void gmc_v11_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid,
if (use_semaphore) {
for (i = 0; i < adev->usec_timeout; i++) {
/* a read return value of 1 means semaphore acuqire */
tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_sem +
hub->eng_distance * eng);
tmp = RREG32_RLC_NO_KIQ(hub->vm_inv_eng0_sem +
hub->eng_distance * eng, hub_ip);
if (tmp & 0x1)
break;
udelay(1);
Expand All @@ -210,12 +214,12 @@ static void gmc_v11_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid,
DRM_ERROR("Timeout waiting for sem acquire in VM flush!\n");
}

WREG32_NO_KIQ(hub->vm_inv_eng0_req + hub->eng_distance * eng, inv_req);
WREG32_RLC_NO_KIQ(hub->vm_inv_eng0_req + hub->eng_distance * eng, inv_req, hub_ip);

/* Wait for ACK with a delay.*/
for (i = 0; i < adev->usec_timeout; i++) {
tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_ack +
hub->eng_distance * eng);
tmp = RREG32_RLC_NO_KIQ(hub->vm_inv_eng0_ack +
hub->eng_distance * eng, hub_ip);
tmp &= 1 << vmid;
if (tmp)
break;
Expand All @@ -229,8 +233,8 @@ static void gmc_v11_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid,
* add semaphore release after invalidation,
* write with 0 means semaphore release
*/
WREG32_NO_KIQ(hub->vm_inv_eng0_sem +
hub->eng_distance * eng, 0);
WREG32_RLC_NO_KIQ(hub->vm_inv_eng0_sem +
hub->eng_distance * eng, 0, hub_ip);

/* Issue additional private vm invalidation to MMHUB */
if ((vmhub != AMDGPU_GFXHUB_0) &&
Expand Down
Loading

0 comments on commit e35184f

Please sign in to comment.