Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
fc74881
drm/amdgpu: fix dropped backing store handling in amdgpu_dma_buf_move…
Dec 10, 2021
bf67014
drm/amdgpu: introduce new amdgpu_fence object to indicate the job emb…
huangrui Dec 16, 2021
19e66d5
drm/amd/pm: Fix xgmi link control on aldebaran
Dec 15, 2021
b786517
drm/amdgpu: When the VCN(1.0) block is suspended, powergating is expl…
Dec 9, 2021
5e713c6
drm/amdgpu: add support for IP discovery gc_info table v2
alexdeucher Dec 16, 2021
67f7430
drm/nouveau: wait for the exclusive fence after the shared ones v2
ChristianKoenigAMD Dec 7, 2021
4d625a9
drm/amdgpu: fix runpm documentation
alexdeucher Dec 21, 2021
0f9d36a
drm/i915: Fix possible uninitialized variable in parallel extension
mbrost05 Dec 19, 2021
d46f329
drm/i915: Increment composite fence seqno
mbrost05 Dec 14, 2021
8c45096
drm/amd/pm: skip setting gfx cgpg in the s0ix suspend-resume
prliangpub Dec 13, 2021
daf8de0
drm/amdgpu: always reset the asic in suspend (v2)
alexdeucher Nov 12, 2021
7be3be2
drm/amdgpu: put SMU into proper state on runpm suspending for BOCO ca…
Dec 17, 2021
ebae897
drm/amdgpu: no DC support for headless chips
alexdeucher Dec 23, 2021
2eb8257
drm/amd/display: fix B0 TMDS deepcolor no dislay issue
charliu-AMDENG Dec 6, 2021
d97e631
drm/amd/display: Added power down for DCN10
Dec 6, 2021
a07f8b9
drm/amd/display: Send s0i2_rdy in stream_count == 0 optimization
Dec 9, 2021
33735c1
drm/amd/display: Set optimize_pwr_state for DCN31
Dec 9, 2021
33bb639
drm/amd/display: Fix USB4 null pointer dereference in update_psp_stre…
Dec 17, 2021
ee2698c
drm/amd/display: Changed pipe split policy to allow for multi-display…
Dec 9, 2021
05097b1
Merge tag 'drm-intel-fixes-2021-12-29' of git://anongit.freedesktop.o…
airlied Dec 30, 2021
aeeb82f
Merge tag 'amd-drm-fixes-5.16-2021-12-29' of https://gitlab.freedeskt…
airlied Dec 30, 2021
ce9b333
Merge branch 'drm-misc-fixes' of ssh://git.freedesktop.org/git/drm/dr…
airlied Dec 31, 2021
4f3d93c
Merge tag 'drm-fixes-2021-12-31' of git://anongit.freedesktop.org/drm…
torvalds Dec 31, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 8 additions & 9 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
Original file line number Diff line number Diff line change
Expand Up @@ -3166,6 +3166,12 @@ static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
{
switch (asic_type) {
#ifdef CONFIG_DRM_AMDGPU_SI
case CHIP_HAINAN:
#endif
case CHIP_TOPAZ:
/* chips with no display hardware */
return false;
#if defined(CONFIG_DRM_AMD_DC)
case CHIP_TAHITI:
case CHIP_PITCAIRN:
Expand Down Expand Up @@ -4461,7 +4467,7 @@ int amdgpu_device_mode1_reset(struct amdgpu_device *adev)
int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
struct amdgpu_reset_context *reset_context)
{
int i, j, r = 0;
int i, r = 0;
struct amdgpu_job *job = NULL;
bool need_full_reset =
test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
Expand All @@ -4483,15 +4489,8 @@ int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,

/*clear job fence from fence drv to avoid force_completion
*leave NULL and vm flush fence in fence drv */
for (j = 0; j <= ring->fence_drv.num_fences_mask; j++) {
struct dma_fence *old, **ptr;
amdgpu_fence_driver_clear_job_fences(ring);

ptr = &ring->fence_drv.fences[j];
old = rcu_dereference_protected(*ptr, 1);
if (old && test_bit(AMDGPU_FENCE_FLAG_EMBED_IN_JOB_BIT, &old->flags)) {
RCU_INIT_POINTER(*ptr, NULL);
}
}
/* after all hw jobs are reset, hw fence is meaningless, so force_completion */
amdgpu_fence_driver_force_completion(ring);
}
Expand Down
76 changes: 54 additions & 22 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
Original file line number Diff line number Diff line change
Expand Up @@ -526,39 +526,71 @@ void amdgpu_discovery_harvest_ip(struct amdgpu_device *adev)
}
}

union gc_info {
struct gc_info_v1_0 v1;
struct gc_info_v2_0 v2;
};

int amdgpu_discovery_get_gfx_info(struct amdgpu_device *adev)
{
struct binary_header *bhdr;
struct gc_info_v1_0 *gc_info;
union gc_info *gc_info;

if (!adev->mman.discovery_bin) {
DRM_ERROR("ip discovery uninitialized\n");
return -EINVAL;
}

bhdr = (struct binary_header *)adev->mman.discovery_bin;
gc_info = (struct gc_info_v1_0 *)(adev->mman.discovery_bin +
gc_info = (union gc_info *)(adev->mman.discovery_bin +
le16_to_cpu(bhdr->table_list[GC].offset));

adev->gfx.config.max_shader_engines = le32_to_cpu(gc_info->gc_num_se);
adev->gfx.config.max_cu_per_sh = 2 * (le32_to_cpu(gc_info->gc_num_wgp0_per_sa) +
le32_to_cpu(gc_info->gc_num_wgp1_per_sa));
adev->gfx.config.max_sh_per_se = le32_to_cpu(gc_info->gc_num_sa_per_se);
adev->gfx.config.max_backends_per_se = le32_to_cpu(gc_info->gc_num_rb_per_se);
adev->gfx.config.max_texture_channel_caches = le32_to_cpu(gc_info->gc_num_gl2c);
adev->gfx.config.max_gprs = le32_to_cpu(gc_info->gc_num_gprs);
adev->gfx.config.max_gs_threads = le32_to_cpu(gc_info->gc_num_max_gs_thds);
adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gc_info->gc_gs_table_depth);
adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gc_info->gc_gsprim_buff_depth);
adev->gfx.config.double_offchip_lds_buf = le32_to_cpu(gc_info->gc_double_offchip_lds_buffer);
adev->gfx.cu_info.wave_front_size = le32_to_cpu(gc_info->gc_wave_size);
adev->gfx.cu_info.max_waves_per_simd = le32_to_cpu(gc_info->gc_max_waves_per_simd);
adev->gfx.cu_info.max_scratch_slots_per_cu = le32_to_cpu(gc_info->gc_max_scratch_slots_per_cu);
adev->gfx.cu_info.lds_size = le32_to_cpu(gc_info->gc_lds_size);
adev->gfx.config.num_sc_per_sh = le32_to_cpu(gc_info->gc_num_sc_per_se) /
le32_to_cpu(gc_info->gc_num_sa_per_se);
adev->gfx.config.num_packer_per_sc = le32_to_cpu(gc_info->gc_num_packer_per_sc);

switch (gc_info->v1.header.version_major) {
case 1:
adev->gfx.config.max_shader_engines = le32_to_cpu(gc_info->v1.gc_num_se);
adev->gfx.config.max_cu_per_sh = 2 * (le32_to_cpu(gc_info->v1.gc_num_wgp0_per_sa) +
le32_to_cpu(gc_info->v1.gc_num_wgp1_per_sa));
adev->gfx.config.max_sh_per_se = le32_to_cpu(gc_info->v1.gc_num_sa_per_se);
adev->gfx.config.max_backends_per_se = le32_to_cpu(gc_info->v1.gc_num_rb_per_se);
adev->gfx.config.max_texture_channel_caches = le32_to_cpu(gc_info->v1.gc_num_gl2c);
adev->gfx.config.max_gprs = le32_to_cpu(gc_info->v1.gc_num_gprs);
adev->gfx.config.max_gs_threads = le32_to_cpu(gc_info->v1.gc_num_max_gs_thds);
adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gc_info->v1.gc_gs_table_depth);
adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gc_info->v1.gc_gsprim_buff_depth);
adev->gfx.config.double_offchip_lds_buf = le32_to_cpu(gc_info->v1.gc_double_offchip_lds_buffer);
adev->gfx.cu_info.wave_front_size = le32_to_cpu(gc_info->v1.gc_wave_size);
adev->gfx.cu_info.max_waves_per_simd = le32_to_cpu(gc_info->v1.gc_max_waves_per_simd);
adev->gfx.cu_info.max_scratch_slots_per_cu = le32_to_cpu(gc_info->v1.gc_max_scratch_slots_per_cu);
adev->gfx.cu_info.lds_size = le32_to_cpu(gc_info->v1.gc_lds_size);
adev->gfx.config.num_sc_per_sh = le32_to_cpu(gc_info->v1.gc_num_sc_per_se) /
le32_to_cpu(gc_info->v1.gc_num_sa_per_se);
adev->gfx.config.num_packer_per_sc = le32_to_cpu(gc_info->v1.gc_num_packer_per_sc);
break;
case 2:
adev->gfx.config.max_shader_engines = le32_to_cpu(gc_info->v2.gc_num_se);
adev->gfx.config.max_cu_per_sh = le32_to_cpu(gc_info->v2.gc_num_cu_per_sh);
adev->gfx.config.max_sh_per_se = le32_to_cpu(gc_info->v2.gc_num_sh_per_se);
adev->gfx.config.max_backends_per_se = le32_to_cpu(gc_info->v2.gc_num_rb_per_se);
adev->gfx.config.max_texture_channel_caches = le32_to_cpu(gc_info->v2.gc_num_tccs);
adev->gfx.config.max_gprs = le32_to_cpu(gc_info->v2.gc_num_gprs);
adev->gfx.config.max_gs_threads = le32_to_cpu(gc_info->v2.gc_num_max_gs_thds);
adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gc_info->v2.gc_gs_table_depth);
adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gc_info->v2.gc_gsprim_buff_depth);
adev->gfx.config.double_offchip_lds_buf = le32_to_cpu(gc_info->v2.gc_double_offchip_lds_buffer);
adev->gfx.cu_info.wave_front_size = le32_to_cpu(gc_info->v2.gc_wave_size);
adev->gfx.cu_info.max_waves_per_simd = le32_to_cpu(gc_info->v2.gc_max_waves_per_simd);
adev->gfx.cu_info.max_scratch_slots_per_cu = le32_to_cpu(gc_info->v2.gc_max_scratch_slots_per_cu);
adev->gfx.cu_info.lds_size = le32_to_cpu(gc_info->v2.gc_lds_size);
adev->gfx.config.num_sc_per_sh = le32_to_cpu(gc_info->v2.gc_num_sc_per_se) /
le32_to_cpu(gc_info->v2.gc_num_sh_per_se);
adev->gfx.config.num_packer_per_sc = le32_to_cpu(gc_info->v2.gc_num_packer_per_sc);
break;
default:
dev_err(adev->dev,
"Unhandled GC info table %d.%d\n",
gc_info->v1.header.version_major,
gc_info->v1.header.version_minor);
return -EINVAL;
}
return 0;
}

Expand Down
2 changes: 1 addition & 1 deletion drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
Original file line number Diff line number Diff line change
Expand Up @@ -384,7 +384,7 @@ amdgpu_dma_buf_move_notify(struct dma_buf_attachment *attach)
struct amdgpu_vm_bo_base *bo_base;
int r;

if (bo->tbo.resource->mem_type == TTM_PL_SYSTEM)
if (!bo->tbo.resource || bo->tbo.resource->mem_type == TTM_PL_SYSTEM)
return;

r = ttm_bo_validate(&bo->tbo, &placement, &ctx);
Expand Down
27 changes: 23 additions & 4 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
Original file line number Diff line number Diff line change
Expand Up @@ -328,10 +328,11 @@ module_param_named(aspm, amdgpu_aspm, int, 0444);

/**
* DOC: runpm (int)
* Override for runtime power management control for dGPUs in PX/HG laptops. The amdgpu driver can dynamically power down
* the dGPU on PX/HG laptops when it is idle. The default is -1 (auto enable). Setting the value to 0 disables this functionality.
* Override for runtime power management control for dGPUs. The amdgpu driver can dynamically power down
* the dGPUs when they are idle if supported. The default is -1 (auto enable).
* Setting the value to 0 disables this functionality.
*/
MODULE_PARM_DESC(runpm, "PX runtime pm (2 = force enable with BAMACO, 1 = force enable with BACO, 0 = disable, -1 = PX only default)");
MODULE_PARM_DESC(runpm, "PX runtime pm (2 = force enable with BAMACO, 1 = force enable with BACO, 0 = disable, -1 = auto)");
module_param_named(runpm, amdgpu_runtime_pm, int, 0444);

/**
Expand Down Expand Up @@ -2153,7 +2154,10 @@ static int amdgpu_pmops_suspend(struct device *dev)
adev->in_s3 = true;
r = amdgpu_device_suspend(drm_dev, true);
adev->in_s3 = false;

if (r)
return r;
if (!adev->in_s0ix)
r = amdgpu_asic_reset(adev);
return r;
}

Expand Down Expand Up @@ -2234,12 +2238,27 @@ static int amdgpu_pmops_runtime_suspend(struct device *dev)
if (amdgpu_device_supports_px(drm_dev))
drm_dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;

/*
* By setting mp1_state as PP_MP1_STATE_UNLOAD, MP1 will do some
* proper cleanups and put itself into a state ready for PNP. That
* can address some random resuming failure observed on BOCO capable
* platforms.
* TODO: this may be also needed for PX capable platform.
*/
if (amdgpu_device_supports_boco(drm_dev))
adev->mp1_state = PP_MP1_STATE_UNLOAD;

ret = amdgpu_device_suspend(drm_dev, false);
if (ret) {
adev->in_runpm = false;
if (amdgpu_device_supports_boco(drm_dev))
adev->mp1_state = PP_MP1_STATE_NONE;
return ret;
}

if (amdgpu_device_supports_boco(drm_dev))
adev->mp1_state = PP_MP1_STATE_NONE;

if (amdgpu_device_supports_px(drm_dev)) {
/* Only need to handle PCI state in the driver for ATPX
* PCI core handles it for _PR3.
Expand Down
126 changes: 87 additions & 39 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
Original file line number Diff line number Diff line change
Expand Up @@ -77,11 +77,13 @@ void amdgpu_fence_slab_fini(void)
* Cast helper
*/
static const struct dma_fence_ops amdgpu_fence_ops;
static const struct dma_fence_ops amdgpu_job_fence_ops;
static inline struct amdgpu_fence *to_amdgpu_fence(struct dma_fence *f)
{
struct amdgpu_fence *__f = container_of(f, struct amdgpu_fence, base);

if (__f->base.ops == &amdgpu_fence_ops)
if (__f->base.ops == &amdgpu_fence_ops ||
__f->base.ops == &amdgpu_job_fence_ops)
return __f;

return NULL;
Expand Down Expand Up @@ -158,19 +160,18 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f, struct amd
}

seq = ++ring->fence_drv.sync_seq;
if (job != NULL && job->job_run_counter) {
if (job && job->job_run_counter) {
/* reinit seq for resubmitted jobs */
fence->seqno = seq;
} else {
dma_fence_init(fence, &amdgpu_fence_ops,
&ring->fence_drv.lock,
adev->fence_context + ring->idx,
seq);
}

if (job != NULL) {
/* mark this fence has a parent job */
set_bit(AMDGPU_FENCE_FLAG_EMBED_IN_JOB_BIT, &fence->flags);
if (job)
dma_fence_init(fence, &amdgpu_job_fence_ops,
&ring->fence_drv.lock,
adev->fence_context + ring->idx, seq);
else
dma_fence_init(fence, &amdgpu_fence_ops,
&ring->fence_drv.lock,
adev->fence_context + ring->idx, seq);
}

amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr,
Expand Down Expand Up @@ -620,6 +621,25 @@ void amdgpu_fence_driver_hw_init(struct amdgpu_device *adev)
}
}

/**
* amdgpu_fence_driver_clear_job_fences - clear job embedded fences of ring
*
* @ring: fence of the ring to be cleared
*
*/
void amdgpu_fence_driver_clear_job_fences(struct amdgpu_ring *ring)
{
int i;
struct dma_fence *old, **ptr;

for (i = 0; i <= ring->fence_drv.num_fences_mask; i++) {
ptr = &ring->fence_drv.fences[i];
old = rcu_dereference_protected(*ptr, 1);
if (old && old->ops == &amdgpu_job_fence_ops)
RCU_INIT_POINTER(*ptr, NULL);
}
}

/**
* amdgpu_fence_driver_force_completion - force signal latest fence of ring
*
Expand All @@ -643,16 +663,14 @@ static const char *amdgpu_fence_get_driver_name(struct dma_fence *fence)

static const char *amdgpu_fence_get_timeline_name(struct dma_fence *f)
{
struct amdgpu_ring *ring;
return (const char *)to_amdgpu_fence(f)->ring->name;
}

if (test_bit(AMDGPU_FENCE_FLAG_EMBED_IN_JOB_BIT, &f->flags)) {
struct amdgpu_job *job = container_of(f, struct amdgpu_job, hw_fence);
static const char *amdgpu_job_fence_get_timeline_name(struct dma_fence *f)
{
struct amdgpu_job *job = container_of(f, struct amdgpu_job, hw_fence);

ring = to_amdgpu_ring(job->base.sched);
} else {
ring = to_amdgpu_fence(f)->ring;
}
return (const char *)ring->name;
return (const char *)to_amdgpu_ring(job->base.sched)->name;
}

/**
Expand All @@ -665,18 +683,25 @@ static const char *amdgpu_fence_get_timeline_name(struct dma_fence *f)
*/
static bool amdgpu_fence_enable_signaling(struct dma_fence *f)
{
struct amdgpu_ring *ring;
if (!timer_pending(&to_amdgpu_fence(f)->ring->fence_drv.fallback_timer))
amdgpu_fence_schedule_fallback(to_amdgpu_fence(f)->ring);

if (test_bit(AMDGPU_FENCE_FLAG_EMBED_IN_JOB_BIT, &f->flags)) {
struct amdgpu_job *job = container_of(f, struct amdgpu_job, hw_fence);
return true;
}

ring = to_amdgpu_ring(job->base.sched);
} else {
ring = to_amdgpu_fence(f)->ring;
}
/**
* amdgpu_job_fence_enable_signaling - enable signalling on job fence
* @f: fence
*
* This is the simliar function with amdgpu_fence_enable_signaling above, it
* only handles the job embedded fence.
*/
static bool amdgpu_job_fence_enable_signaling(struct dma_fence *f)
{
struct amdgpu_job *job = container_of(f, struct amdgpu_job, hw_fence);

if (!timer_pending(&ring->fence_drv.fallback_timer))
amdgpu_fence_schedule_fallback(ring);
if (!timer_pending(&to_amdgpu_ring(job->base.sched)->fence_drv.fallback_timer))
amdgpu_fence_schedule_fallback(to_amdgpu_ring(job->base.sched));

return true;
}
Expand All @@ -692,19 +717,23 @@ static void amdgpu_fence_free(struct rcu_head *rcu)
{
struct dma_fence *f = container_of(rcu, struct dma_fence, rcu);

if (test_bit(AMDGPU_FENCE_FLAG_EMBED_IN_JOB_BIT, &f->flags)) {
/* free job if fence has a parent job */
struct amdgpu_job *job;

job = container_of(f, struct amdgpu_job, hw_fence);
kfree(job);
} else {
/* free fence_slab if it's separated fence*/
struct amdgpu_fence *fence;
kmem_cache_free(amdgpu_fence_slab, to_amdgpu_fence(f));
}

fence = to_amdgpu_fence(f);
kmem_cache_free(amdgpu_fence_slab, fence);
}
/**
* amdgpu_job_fence_free - free up the job with embedded fence
*
* @rcu: RCU callback head
*
* Free up the job with embedded fence after the RCU grace period.
*/
static void amdgpu_job_fence_free(struct rcu_head *rcu)
{
struct dma_fence *f = container_of(rcu, struct dma_fence, rcu);

/* free job if fence has a parent job */
kfree(container_of(f, struct amdgpu_job, hw_fence));
}

/**
Expand All @@ -720,13 +749,32 @@ static void amdgpu_fence_release(struct dma_fence *f)
call_rcu(&f->rcu, amdgpu_fence_free);
}

/**
* amdgpu_job_fence_release - callback that job embedded fence can be freed
*
* @f: fence
*
* This is the simliar function with amdgpu_fence_release above, it
* only handles the job embedded fence.
*/
static void amdgpu_job_fence_release(struct dma_fence *f)
{
call_rcu(&f->rcu, amdgpu_job_fence_free);
}

static const struct dma_fence_ops amdgpu_fence_ops = {
.get_driver_name = amdgpu_fence_get_driver_name,
.get_timeline_name = amdgpu_fence_get_timeline_name,
.enable_signaling = amdgpu_fence_enable_signaling,
.release = amdgpu_fence_release,
};

static const struct dma_fence_ops amdgpu_job_fence_ops = {
.get_driver_name = amdgpu_fence_get_driver_name,
.get_timeline_name = amdgpu_job_fence_get_timeline_name,
.enable_signaling = amdgpu_job_fence_enable_signaling,
.release = amdgpu_job_fence_release,
};

/*
* Fence debugfs
Expand Down
Loading