-
-
Notifications
You must be signed in to change notification settings - Fork 1.5k
/
GSRendererHW.cpp
2095 lines (1696 loc) · 63.1 KB
/
GSRendererHW.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/*
* Copyright (C) 2007-2009 Gabest
* http://www.gabest.org
*
* This Program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2, or (at your option)
* any later version.
*
* This Program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNU Make; see the file COPYING. If not, write to
* the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA.
* http://www.gnu.org/copyleft/gpl.html
*
*/
#include "stdafx.h"
#include "GSRendererHW.h"
GSRendererHW::GSRendererHW(GSTextureCache* tc)
: m_width(default_rt_size.x)
, m_height(default_rt_size.y)
, m_custom_width(1024)
, m_custom_height(1024)
, m_reset(false)
, m_upscale_multiplier(1)
, m_disable_ts_half_bottom(false)
, m_tc(tc)
, m_src(nullptr)
, m_userhacks_tcoffset(false)
, m_userhacks_tcoffset_x(0)
, m_userhacks_tcoffset_y(0)
, m_channel_shuffle(false)
, m_lod(GSVector2i(0,0))
{
m_mipmap = theApp.GetConfigI("mipmap_hw");
m_upscale_multiplier = theApp.GetConfigI("upscale_multiplier");
m_large_framebuffer = theApp.GetConfigB("large_framebuffer");
m_accurate_date = theApp.GetConfigI("accurate_date");
m_disable_ts_half_bottom = theApp.GetConfigB("disable_ts_half_bottom");
if (theApp.GetConfigB("UserHacks")) {
m_userhacks_enabled_gs_mem_clear = !theApp.GetConfigB("UserHacks_Disable_Safe_Features");
m_userHacks_enabled_unscale_ptln = !theApp.GetConfigB("UserHacks_Disable_Safe_Features");
m_userhacks_align_sprite_X = theApp.GetConfigB("UserHacks_align_sprite_X");
m_userhacks_round_sprite_offset = theApp.GetConfigI("UserHacks_round_sprite_offset");
m_userHacks_HPO = theApp.GetConfigI("UserHacks_HalfPixelOffset");
m_userHacks_merge_sprite = theApp.GetConfigB("UserHacks_merge_pp_sprite");
m_userhacks_tcoffset_x = theApp.GetConfigI("UserHacks_TCOffsetX") / -1000.0f;
m_userhacks_tcoffset_y = theApp.GetConfigI("UserHacks_TCOffsetY") / -1000.0f;
m_userhacks_tcoffset = m_userhacks_tcoffset_x < 0.0f || m_userhacks_tcoffset_y < 0.0f;
} else {
m_userhacks_enabled_gs_mem_clear = true;
m_userHacks_enabled_unscale_ptln = true;
m_userhacks_align_sprite_X = false;
m_userhacks_round_sprite_offset = 0;
m_userHacks_HPO = 0;
m_userHacks_merge_sprite = false;
}
if (!m_upscale_multiplier) { //Custom Resolution
m_custom_width = m_width = theApp.GetConfigI("resx");
m_custom_height = m_height = theApp.GetConfigI("resy");
}
if (m_upscale_multiplier == 1) { // hacks are only needed for upscaling issues.
m_userhacks_round_sprite_offset = 0;
m_userhacks_align_sprite_X = false;
m_userHacks_merge_sprite = false;
}
m_dump_root = root_hw;
}
void GSRendererHW::SetScaling()
{
if (!m_upscale_multiplier)
{
CustomResolutionScaling();
return;
}
GSVector2i crtc_size(GetDisplayRect().width(), GetDisplayRect().height());
// Details of (potential) perf impact of a big framebuffer
// 1/ extra memory
// 2/ texture cache framebuffer rescaling/copy
// 3/ upload of framebuffer (preload hack)
// 4/ framebuffer clear (color/depth/stencil)
// 5/ read back of the frambuffer
//
// With the solution
// 1/ Nothing to do.Except the texture cache bug (channel shuffle effect)
// most of the market is 1GB of VRAM (and soon 2GB)
// 2/ limit rescaling/copy to the valid data of the framebuffer
// 3/ ??? no solution so far
// 4a/ stencil can be limited to valid data.
// 4b/ is it useful to clear color? depth? (in any case, it ought to be few operation)
// 5/ limit the read to the valid data
// Framebuffer width is always a multiple of 64 so at certain cases it can't cover some weird width values.
// 480P , 576P use width as 720 which is not referencable by FBW * 64. so it produces 704 ( the closest value multiple by 64).
// In such cases, let's just use the CRTC width.
int fb_width = std::max({ (int)m_context->FRAME.FBW * 64, crtc_size.x , 512 });
// GS doesn't have a specific register for the FrameBuffer height. so we get the height
// from physical units of the display rectangle in case the game uses a heigher value of height.
//
// Gregory: the framebuffer must have enough room to draw
// * at least 2 frames such as FMV (see OI_BlitFMV)
// * high resolution game such as snowblind engine game
//
// Autodetection isn't a good idea because it will create flickering
// If memory consumption is an issue, there are 2 possibilities
// * 1/ Avoid to create hundreds of RT
// * 2/ Use sparse texture (requires recent HW)
//
// Avoid to alternate between 640x1280 and 1280x1024 on snow blind engine game
// int fb_height = (fb_width < 1024) ? 1280 : 1024;
//
// Until performance issue is properly fixed, let's keep an option to reduce the framebuffer size.
int fb_height = m_large_framebuffer ? 1280 :
(fb_width < 1024) ? std::max(512, crtc_size.y) : 1024;
int upscaled_fb_w = fb_width * m_upscale_multiplier;
int upscaled_fb_h = fb_height * m_upscale_multiplier;
bool good_rt_size = m_width >= upscaled_fb_w && m_height >= upscaled_fb_h;
// No need to resize for native/custom resolutions as default size will be enough for native and we manually get RT Buffer size for custom.
// don't resize until the display rectangle and register states are stabilized.
if ( m_upscale_multiplier <= 1 || good_rt_size)
return;
m_tc->RemovePartial();
m_width = upscaled_fb_w;
m_height = upscaled_fb_h;
printf("Frame buffer size set to %dx%d (%dx%d)\n", fb_width, fb_height , m_width, m_height);
}
void GSRendererHW::CustomResolutionScaling()
{
const int crtc_width = GetDisplayRect().width();
const int crtc_height = GetDisplayRect().height();
GSVector2 scaling_ratio;
scaling_ratio.x = std::ceil(static_cast<float>(m_custom_width) / crtc_width);
scaling_ratio.y = std::ceil(static_cast<float>(m_custom_height) / crtc_height);
// Avoid using a scissor value which is too high, developers can even leave the scissor to max (2047)
// at some cases when they don't want to limit the rendering size. Our assumption is that developers
// set the scissor to the actual data in the buffer. Let's use the scissoring value only at such cases
const int scissor_width = std::min(640, static_cast<int>(m_context->SCISSOR.SCAX1 - m_context->SCISSOR.SCAX0) + 1);
const int scissor_height = std::min(640, static_cast<int>(m_context->SCISSOR.SCAY1 - m_context->SCISSOR.SCAY0) + 1);
GSVector2i scissored_buffer_size;
//TODO: SCAX is not used yet, not sure if it's worth considering the horizontal scissor? dunno where it helps yet.
// the ICO testcase is there to show that vertical scissor is helpful on the double scan mode games.
scissored_buffer_size.x = std::max(crtc_width, scissor_width);
scissored_buffer_size.y = std::max(crtc_height, scissor_height);
// We also consider for potential scissor sizes which are around
// the size of the actual image data stored. (Helps ICO to properly scale to right size by help of the
// scissoring values) Display rectangle has a height of 256 but scissor has a height of 512 which seems to
// be the real buffer size. Not sure if the width one is needed, need to check it on some random data before enabling it.
// int framebuffer_width = static_cast<int>(std::round(scissored_buffer_size.x * scaling_ratio.x));
int framebuffer_height = static_cast<int>(std::round(scissored_buffer_size.y * scaling_ratio.y));
if (m_width >= m_custom_width && m_height >= framebuffer_height)
return;
m_tc->RemovePartial();
m_width = std::max(m_width, default_rt_size.x);
m_height = std::max(framebuffer_height, default_rt_size.y);
printf("Frame buffer size set to %dx%d (%dx%d)\n", scissored_buffer_size.x, scissored_buffer_size.y, m_width, m_height);
}
GSRendererHW::~GSRendererHW()
{
delete m_tc;
}
void GSRendererHW::SetGameCRC(uint32 crc, int options)
{
GSRenderer::SetGameCRC(crc, options);
m_hacks.SetGameCRC(m_game);
// Code for Automatic Mipmapping. Relies on game CRCs.
if (theApp.GetConfigT<HWMipmapLevel>("mipmap_hw") == HWMipmapLevel::Automatic)
{
switch (CRC::Lookup(crc).title)
{
case CRC::AceCombatZero:
case CRC::AceCombat4:
case CRC::AceCombat5:
case CRC::ApeEscape2:
case CRC::Barnyard:
case CRC::BrianLaraInternationalCricket:
case CRC::DarkCloud:
case CRC::DestroyAllHumans:
case CRC::DestroyAllHumans2:
case CRC::FIFA03:
case CRC::FIFA04:
case CRC::FIFA05:
case CRC::HarryPotterATCOS:
case CRC::HarryPotterATHBP:
case CRC::HarryPotterATPOA:
case CRC::HarryPotterOOTP:
// Disable Automatic mipmapping for Jak games for now, it seems to cause a hard crash.
// Issue https://github.com/PCSX2/pcsx2/issues/2916
// case CRC::Jak1:
// case CRC::Jak3:
case CRC::LegacyOfKainDefiance:
case CRC::NicktoonsUnite:
case CRC::RatchetAndClank:
case CRC::RatchetAndClank2:
case CRC::RatchetAndClank3:
case CRC::RatchetAndClank4:
case CRC::RatchetAndClank5:
case CRC::RickyPontingInternationalCricket:
case CRC::Quake3Revolution:
case CRC::Shox:
case CRC::SoulReaver2:
case CRC::TheIncredibleHulkUD:
case CRC::TombRaiderAnniversary:
case CRC::TribesAerialAssault:
case CRC::Whiplash:
m_mipmap = static_cast<int>(HWMipmapLevel::Basic);
break;
default:
m_mipmap = static_cast<int>(HWMipmapLevel::Off);
break;
}
}
}
bool GSRendererHW::CanUpscale()
{
if(m_hacks.m_cu && !(this->*m_hacks.m_cu)())
{
return false;
}
return m_upscale_multiplier!=1 && m_regs->PMODE.EN != 0; // upscale ratio depends on the display size, with no output it may not be set correctly (ps2 logo to game transition)
}
int GSRendererHW::GetUpscaleMultiplier()
{
return m_upscale_multiplier;
}
GSVector2i GSRendererHW::GetCustomResolution()
{
return GSVector2i(m_custom_width, m_custom_height);
}
void GSRendererHW::Reset()
{
// TODO: GSreset can come from the main thread too => crash
// m_tc->RemoveAll();
m_reset = true;
GSRenderer::Reset();
}
void GSRendererHW::VSync(int field)
{
//Check if the frame buffer width or display width has changed
SetScaling();
if(m_reset)
{
m_tc->RemoveAll();
m_reset = false;
}
GSRenderer::VSync(field);
m_tc->IncAge();
m_tc->PrintMemoryUsage();
m_dev->PrintMemoryUsage();
m_skip = 0;
m_skip_offset = 0;
}
void GSRendererHW::ResetDevice()
{
m_tc->RemoveAll();
GSRenderer::ResetDevice();
}
GSTexture* GSRendererHW::GetOutput(int i, int& y_offset)
{
const GSRegDISPFB& DISPFB = m_regs->DISP[i].DISPFB;
GIFRegTEX0 TEX0;
TEX0.TBP0 = DISPFB.Block();
TEX0.TBW = DISPFB.FBW;
TEX0.PSM = DISPFB.PSM;
// TRACE(_T("[%d] GetOutput %d %05x (%d)\n"), (int)m_perfmon.GetFrame(), i, (int)TEX0.TBP0, (int)TEX0.PSM);
GSTexture* t = NULL;
if(GSTextureCache::Target* rt = m_tc->LookupTarget(TEX0, m_width, m_height, GetFramebufferHeight()))
{
t = rt->m_texture;
int delta = TEX0.TBP0 - rt->m_TEX0.TBP0;
if (delta > 0 && DISPFB.FBW != 0) {
int pages = delta >> 5u;
int y_pages = pages / DISPFB.FBW;
y_offset = y_pages * GSLocalMemory::m_psm[DISPFB.PSM].pgs.y;
GL_CACHE("Frame y offset %d pixels, unit %d", y_offset, i);
}
#ifdef ENABLE_OGL_DEBUG
if(s_dump)
{
if(s_savef && s_n >= s_saven)
{
t->Save(m_dump_root + format("%05d_f%lld_fr%d_%05x_%s.bmp", s_n, m_perfmon.GetFrame(), i, (int)TEX0.TBP0, psm_str(TEX0.PSM)));
}
}
#endif
}
return t;
}
GSTexture* GSRendererHW::GetFeedbackOutput()
{
GIFRegTEX0 TEX0;
TEX0.TBP0 = m_regs->EXTBUF.EXBP;
TEX0.TBW = m_regs->EXTBUF.EXBW;
TEX0.PSM = m_regs->DISP[m_regs->EXTBUF.FBIN & 1].DISPFB.PSM;
GSTextureCache::Target* rt = m_tc->LookupTarget(TEX0, m_width, m_height, /*GetFrameRect(i).bottom*/0);
GSTexture* t = rt->m_texture;
#ifdef ENABLE_OGL_DEBUG
if(s_dump && s_savef && s_n >= s_saven)
t->Save(m_dump_root + format("%05d_f%lld_fr%d_%05x_%s.bmp", s_n, m_perfmon.GetFrame(), 3, (int)TEX0.TBP0, psm_str(TEX0.PSM)));
#endif
return t;
}
void GSRendererHW::Lines2Sprites()
{
ASSERT(m_vt.m_primclass == GS_SPRITE_CLASS);
// each sprite converted to quad needs twice the space
while (m_vertex.tail * 2 > m_vertex.maxcount)
{
GrowVertexBuffer();
}
// assume vertices are tightly packed and sequentially indexed (it should be the case)
if (m_vertex.next >= 2)
{
size_t count = m_vertex.next;
int i = (int)count * 2 - 4;
GSVertex* s = &m_vertex.buff[count - 2];
GSVertex* q = &m_vertex.buff[count * 2 - 4];
uint32* RESTRICT index = &m_index.buff[count * 3 - 6];
for (; i >= 0; i -= 4, s -= 2, q -= 4, index -= 6)
{
GSVertex v0 = s[0];
GSVertex v1 = s[1];
v0.RGBAQ = v1.RGBAQ;
v0.XYZ.Z = v1.XYZ.Z;
v0.FOG = v1.FOG;
if (PRIM->TME && !PRIM->FST) {
GSVector4 st0 = GSVector4::loadl(&v0.ST.u64);
GSVector4 st1 = GSVector4::loadl(&v1.ST.u64);
GSVector4 Q = GSVector4(v1.RGBAQ.Q, v1.RGBAQ.Q, v1.RGBAQ.Q, v1.RGBAQ.Q);
GSVector4 st = st0.upld(st1) / Q;
GSVector4::storel(&v0.ST.u64, st);
GSVector4::storeh(&v1.ST.u64, st);
v0.RGBAQ.Q = 1.0f;
v1.RGBAQ.Q = 1.0f;
}
q[0] = v0;
q[3] = v1;
// swap x, s, u
uint16 x = v0.XYZ.X;
v0.XYZ.X = v1.XYZ.X;
v1.XYZ.X = x;
float s = v0.ST.S;
v0.ST.S = v1.ST.S;
v1.ST.S = s;
uint16 u = v0.U;
v0.U = v1.U;
v1.U = u;
q[1] = v0;
q[2] = v1;
index[0] = i + 0;
index[1] = i + 1;
index[2] = i + 2;
index[3] = i + 1;
index[4] = i + 2;
index[5] = i + 3;
}
m_vertex.head = m_vertex.tail = m_vertex.next = count * 2;
m_index.tail = count * 3;
}
}
// Fix the vertex position/tex_coordinate from 16 bits color to 32 bits color
void GSRendererHW::ConvertSpriteTextureShuffle(bool& write_ba, bool& read_ba)
{
size_t count = m_vertex.next;
GSVertex* v = &m_vertex.buff[0];
const GIFRegXYOFFSET& o = m_context->XYOFFSET;
// vertex position is 8 to 16 pixels, therefore it is the 16-31 bits of the colors
int pos = (v[0].XYZ.X - o.OFX) & 0xFF;
write_ba = (pos > 112 && pos < 136);
// Read texture is 8 to 16 pixels (same as above)
float tw = (float)(1u << m_context->TEX0.TW);
int tex_pos = (PRIM->FST) ? v[0].U : (int)(tw * v[0].ST.S);
tex_pos &= 0xFF;
read_ba = (tex_pos > 112 && tex_pos < 144);
// Here's the idea
// TS effect is 16 bits but we emulate it on a 32 bits format
// Normally this means we need to divide size by 2.
//
// Some games do two TS effects on each half of the buffer.
// This makes a mess for us in the TC because we end up with two targets
// when we only want one, thus half screen bug.
//
// 32bits emulation means we can do the effect once but double the size.
// Test cases: Crash Twinsantiy and DBZ BT3
int height_delta = m_src->m_valid_rect.height() - m_r.height();
// Test Case: NFS: HP2 splits the effect h:256 and h:192 so 64
bool half_bottom = abs(height_delta) <= 64 && !m_disable_ts_half_bottom;
if (PRIM->FST) {
GL_INS("First vertex is P: %d => %d T: %d => %d", v[0].XYZ.X, v[1].XYZ.X, v[0].U, v[1].U);
for(size_t i = 0; i < count; i += 2) {
if (write_ba)
v[i].XYZ.X -= 128u;
else
v[i+1].XYZ.X += 128u;
if (read_ba)
v[i].U -= 128u;
else
v[i+1].U += 128u;
if (!half_bottom){
// Height is too big (2x).
int tex_offset = v[i].V & 0xF;
GSVector4i offset(o.OFY, tex_offset, o.OFY, tex_offset);
GSVector4i tmp(v[i].XYZ.Y, v[i].V, v[i + 1].XYZ.Y, v[i + 1].V);
tmp = GSVector4i(tmp - offset).srl32(1) + offset;
v[i].XYZ.Y = (uint16)tmp.x;
v[i].V = (uint16)tmp.y;
v[i + 1].XYZ.Y = (uint16)tmp.z;
v[i + 1].V = (uint16)tmp.w;
}
}
} else {
const float offset_8pix = 8.0f / tw;
GL_INS("First vertex is P: %d => %d T: %f => %f (offset %f)", v[0].XYZ.X, v[1].XYZ.X, v[0].ST.S, v[1].ST.S, offset_8pix);
for(size_t i = 0; i < count; i += 2) {
if (write_ba)
v[i].XYZ.X -= 128u;
else
v[i+1].XYZ.X += 128u;
if (read_ba)
v[i].ST.S -= offset_8pix;
else
v[i+1].ST.S += offset_8pix;
if (!half_bottom) {
// Height is too big (2x).
GSVector4i offset(o.OFY, o.OFY);
GSVector4i tmp(v[i].XYZ.Y, v[i + 1].XYZ.Y);
tmp = GSVector4i(tmp - offset).srl32(1) + offset;
//fprintf(stderr, "Before %d, After %d\n", v[i+1].XYZ.Y, tmp.y);
v[i].XYZ.Y = (uint16)tmp.x;
v[i].ST.T /= 2.0f;
v[i + 1].XYZ.Y = (uint16)tmp.y;
v[i + 1].ST.T /= 2.0f;
}
}
}
// Update vertex trace too. Avoid issue to compute bounding box
if (write_ba)
m_vt.m_min.p.x -= 8.0f;
else
m_vt.m_max.p.x += 8.0f;
if (!half_bottom) {
float delta_Y = m_vt.m_max.p.y - m_vt.m_min.p.y;
m_vt.m_max.p.y -= delta_Y / 2.0f;
}
if (read_ba)
m_vt.m_min.t.x -= 8.0f;
else
m_vt.m_max.t.x += 8.0f;
if (!half_bottom) {
float delta_T = m_vt.m_max.t.y - m_vt.m_min.t.y;
m_vt.m_max.t.y -= delta_T / 2.0f;
}
}
GSVector4 GSRendererHW::RealignTargetTextureCoordinate(const GSTextureCache::Source* tex)
{
if (m_userHacks_HPO <= 1 || GetUpscaleMultiplier() == 1) return GSVector4(0.0f);
GSVertex* v = &m_vertex.buff[0];
const GSVector2& scale = tex->m_texture->GetScale();
bool linear = m_vt.IsRealLinear();
int t_position = v[0].U;
GSVector4 half_offset(0.0f);
// FIXME Let's start with something wrong same mess on X and Y
// FIXME Maybe it will be enough to check linear
if (PRIM->FST) {
if (m_userHacks_HPO == 3) {
if (!linear && t_position == 8) {
half_offset.x = 8;
half_offset.y = 8;
} else if (linear && t_position == 16) {
half_offset.x = 16;
half_offset.y = 16;
} else if (m_vt.m_min.p.x == -0.5f) {
half_offset.x = 8;
half_offset.y = 8;
}
} else {
if (!linear && t_position == 8) {
half_offset.x = 8 - 8 / scale.x;
half_offset.y = 8 - 8 / scale.y;
} else if (linear && t_position == 16) {
half_offset.x = 16 - 16 / scale.x;
half_offset.y = 16 - 16 / scale.y;
} else if (m_vt.m_min.p.x == -0.5f) {
half_offset.x = 8;
half_offset.y = 8;
}
}
GL_INS("offset detected %f,%f t_pos %d (linear %d, scale %f)",
half_offset.x, half_offset.y, t_position, linear, scale.x);
} else if (m_vt.m_eq.q) {
float tw = (float)(1 << m_context->TEX0.TW);
float th = (float)(1 << m_context->TEX0.TH);
float q = v[0].RGBAQ.Q;
// Tales of Abyss
half_offset.x = 0.5f * q / tw;
half_offset.y = 0.5f * q / th;
GL_INS("ST offset detected %f,%f (linear %d, scale %f)",
half_offset.x, half_offset.y, linear, scale.x);
}
return half_offset;
}
GSVector4i GSRendererHW::ComputeBoundingBox(const GSVector2& rtscale, const GSVector2i& rtsize)
{
GSVector4 scale = GSVector4(rtscale.x, rtscale.y);
GSVector4 offset = GSVector4(-1.0f, 1.0f); // Round value
GSVector4 box = m_vt.m_min.p.xyxy(m_vt.m_max.p) + offset.xxyy();
return GSVector4i(box * scale.xyxy()).rintersect(GSVector4i(0, 0, rtsize.x, rtsize.y));
}
void GSRendererHW::MergeSprite(GSTextureCache::Source* tex)
{
// Upscaling hack to avoid various line/grid issues
if (m_userHacks_merge_sprite && tex && tex->m_target && (m_vt.m_primclass == GS_SPRITE_CLASS)) {
if (PRIM->FST && GSLocalMemory::m_psm[tex->m_TEX0.PSM].fmt < 2 && ((m_vt.m_eq.value & 0xCFFFF) == 0xCFFFF)) {
// Ideally the hack ought to be enabled in a true paving mode only. I don't know how to do it accurately
// neither in a fast way. So instead let's just take the hypothesis that all sprites must have the same
// size.
// Tested on Tekken 5.
GSVertex* v = &m_vertex.buff[0];
bool is_paving = true;
// SSE optimization: shuffle m[1] to have (4*32 bits) X, Y, U, V
int first_dpX = v[1].XYZ.X - v[0].XYZ.X;
int first_dpU = v[1].U - v[0].U;
for (size_t i = 0; i < m_vertex.next; i += 2) {
int dpX = v[i + 1].XYZ.X - v[i].XYZ.X;
int dpU = v[i + 1].U - v[i].U;
if (dpX != first_dpX || dpU != first_dpU) {
is_paving = false;
break;
}
}
#if 0
GSVector4 delta_p = m_vt.m_max.p - m_vt.m_min.p;
GSVector4 delta_t = m_vt.m_max.t - m_vt.m_min.t;
bool is_blit = PrimitiveOverlap() == PRIM_OVERLAP_NO;
GL_INS("PP SAMPLER: Dp %f %f Dt %f %f. Is blit %d, is paving %d, count %d", delta_p.x, delta_p.y, delta_t.x, delta_t.y, is_blit, is_paving, m_vertex.tail);
#endif
if (is_paving) {
// Replace all sprite with a single fullscreen sprite.
GSVertex* s = &m_vertex.buff[0];
s[0].XYZ.X = static_cast<uint16>((16.0f * m_vt.m_min.p.x) + m_context->XYOFFSET.OFX);
s[1].XYZ.X = static_cast<uint16>((16.0f * m_vt.m_max.p.x) + m_context->XYOFFSET.OFX);
s[0].XYZ.Y = static_cast<uint16>((16.0f * m_vt.m_min.p.y) + m_context->XYOFFSET.OFY);
s[1].XYZ.Y = static_cast<uint16>((16.0f * m_vt.m_max.p.y) + m_context->XYOFFSET.OFY);
s[0].U = static_cast<uint16>(16.0f * m_vt.m_min.t.x);
s[0].V = static_cast<uint16>(16.0f * m_vt.m_min.t.y);
s[1].U = static_cast<uint16>(16.0f * m_vt.m_max.t.x);
s[1].V = static_cast<uint16>(16.0f * m_vt.m_max.t.y);
m_vertex.head = m_vertex.tail = m_vertex.next = 2;
m_index.tail = 2;
}
}
}
}
void GSRendererHW::InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r)
{
// printf("[%d] InvalidateVideoMem %d,%d - %d,%d %05x (%d)\n", (int)m_perfmon.GetFrame(), r.left, r.top, r.right, r.bottom, (int)BITBLTBUF.DBP, (int)BITBLTBUF.DPSM);
m_tc->InvalidateVideoMem(m_mem.GetOffset(BITBLTBUF.DBP, BITBLTBUF.DBW, BITBLTBUF.DPSM), r);
}
void GSRendererHW::InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r, bool clut)
{
// printf("[%d] InvalidateLocalMem %d,%d - %d,%d %05x (%d)\n", (int)m_perfmon.GetFrame(), r.left, r.top, r.right, r.bottom, (int)BITBLTBUF.SBP, (int)BITBLTBUF.SPSM);
if(clut) return; // FIXME
m_tc->InvalidateLocalMem(m_mem.GetOffset(BITBLTBUF.SBP, BITBLTBUF.SBW, BITBLTBUF.SPSM), r);
}
uint16 GSRendererHW::Interpolate_UV(float alpha, int t0, int t1)
{
float t = (1.0f - alpha) * t0 + alpha * t1;
return (uint16)t & ~0xF; // cheap rounding
}
float GSRendererHW::alpha0(int L, int X0, int X1)
{
int x = (X0 + 15) & ~0xF; // Round up
return float(x - X0) / (float)L;
}
float GSRendererHW::alpha1(int L, int X0, int X1)
{
int x = (X1 - 1) & ~0xF; // Round down. Note -1 because right pixel isn't included in primitive so 0x100 must return 0.
return float(x - X0) / (float)L;
}
template <bool linear>
void GSRendererHW::RoundSpriteOffset()
{
//#define DEBUG_U
//#define DEBUG_V
#if defined(DEBUG_V) || defined(DEBUG_U)
bool debug = linear;
#endif
size_t count = m_vertex.next;
GSVertex* v = &m_vertex.buff[0];
for(size_t i = 0; i < count; i += 2) {
// Performance note: if it had any impact on perf, someone would port it to SSE (AKA GSVector)
// Compute the coordinate of first and last texels (in native with a linear filtering)
int ox = m_context->XYOFFSET.OFX;
int X0 = v[i].XYZ.X - ox;
int X1 = v[i+1].XYZ.X - ox;
int Lx = (v[i+1].XYZ.X - v[i].XYZ.X);
float ax0 = alpha0(Lx, X0, X1);
float ax1 = alpha1(Lx, X0, X1);
uint16 tx0 = Interpolate_UV(ax0, v[i].U, v[i+1].U);
uint16 tx1 = Interpolate_UV(ax1, v[i].U, v[i+1].U);
#ifdef DEBUG_U
if (debug) {
fprintf(stderr, "u0:%d and u1:%d\n", v[i].U, v[i+1].U);
fprintf(stderr, "a0:%f and a1:%f\n", ax0, ax1);
fprintf(stderr, "t0:%d and t1:%d\n", tx0, tx1);
}
#endif
int oy = m_context->XYOFFSET.OFY;
int Y0 = v[i].XYZ.Y - oy;
int Y1 = v[i+1].XYZ.Y - oy;
int Ly = (v[i+1].XYZ.Y - v[i].XYZ.Y);
float ay0 = alpha0(Ly, Y0, Y1);
float ay1 = alpha1(Ly, Y0, Y1);
uint16 ty0 = Interpolate_UV(ay0, v[i].V, v[i+1].V);
uint16 ty1 = Interpolate_UV(ay1, v[i].V, v[i+1].V);
#ifdef DEBUG_V
if (debug) {
fprintf(stderr, "v0:%d and v1:%d\n", v[i].V, v[i+1].V);
fprintf(stderr, "a0:%f and a1:%f\n", ay0, ay1);
fprintf(stderr, "t0:%d and t1:%d\n", ty0, ty1);
}
#endif
#ifdef DEBUG_U
if (debug)
fprintf(stderr, "GREP_BEFORE %d => %d\n", v[i].U, v[i+1].U);
#endif
#ifdef DEBUG_V
if (debug)
fprintf(stderr, "GREP_BEFORE %d => %d\n", v[i].V, v[i+1].V);
#endif
#if 1
// Use rounded value of the newly computed texture coordinate. It ensures
// that sampling will remains inside texture boundary
//
// Note for bilinear: by definition it will never work correctly! A sligh modification
// of interpolation migth trigger a discard (with alpha testing)
// Let's use something simple that correct really bad case (for a couple of 2D games).
// I hope it won't create too much glitches.
if (linear) {
int Lu = v[i+1].U - v[i].U;
// Note 32 is based on taisho-mononoke
if ((Lu > 0) && (Lu <= (Lx+32))) {
v[i+1].U -= 8;
}
} else {
if (tx0 <= tx1) {
v[i].U = tx0;
v[i+1].U = tx1 + 16;
} else {
v[i].U = tx0 + 15;
v[i+1].U = tx1;
}
}
#endif
#if 1
if (linear) {
int Lv = v[i+1].V - v[i].V;
if ((Lv > 0) && (Lv <= (Ly+32))) {
v[i+1].V -= 8;
}
} else {
if (ty0 <= ty1) {
v[i].V = ty0;
v[i+1].V = ty1 + 16;
} else {
v[i].V = ty0 + 15;
v[i+1].V = ty1;
}
}
#endif
#ifdef DEBUG_U
if (debug)
fprintf(stderr, "GREP_AFTER %d => %d\n\n", v[i].U, v[i+1].U);
#endif
#ifdef DEBUG_V
if (debug)
fprintf(stderr, "GREP_AFTER %d => %d\n\n", v[i].V, v[i+1].V);
#endif
}
}
void GSRendererHW::Draw()
{
if(m_dev->IsLost() || IsBadFrame()) {
GL_INS("Warning skipping a draw call (%d)", s_n);
return;
}
GL_PUSH("HW Draw %d", s_n);
GSDrawingEnvironment& env = m_env;
GSDrawingContext* context = m_context;
const GSLocalMemory::psm_t& tex_psm = GSLocalMemory::m_psm[m_context->TEX0.PSM];
// Fix TEX0 size
if(PRIM->TME && !IsMipMapActive())
m_context->ComputeFixedTEX0(m_vt.m_min.t.xyxy(m_vt.m_max.t));
// skip alpha test if possible
// Note: do it first so we know if frame/depth writes are masked
GIFRegTEST TEST = context->TEST;
GIFRegFRAME FRAME = context->FRAME;
GIFRegZBUF ZBUF = context->ZBUF;
uint32 fm = context->FRAME.FBMSK;
uint32 zm = context->ZBUF.ZMSK || context->TEST.ZTE == 0 ? 0xffffffff : 0;
// Note required to compute TryAlphaTest below. So do it now.
if (PRIM->TME && tex_psm.pal > 0)
m_mem.m_clut.Read32(context->TEX0, env.TEXA);
// Test if we can optimize Alpha Test as a NOP
context->TEST.ATE = context->TEST.ATE && !GSRenderer::TryAlphaTest(fm, zm);
context->FRAME.FBMSK = fm;
context->ZBUF.ZMSK = zm != 0;
// It is allowed to use the depth and rt at the same location. However at least 1 must
// be disabled. Or the written value must be the same on both channels.
// 1/ GoW uses a Cd blending on a 24 bits buffer (no alpha)
// 2/ SuperMan really draws (0,0,0,0) color and a (0) 32-bits depth
// 3/ 50cents really draws (0,0,0,128) color and a (0) 24 bits depth
// Note: FF DoC has both buffer at same location but disable the depth test (write?) with ZTE = 0
const bool no_rt = (context->ALPHA.IsCd() && PRIM->ABE && (context->FRAME.PSM == 1));
const bool no_ds = !no_rt && (
// Depth is always pass/fail (no read) and write are discarded (tekken 5). (Note: DATE is currently implemented with a stencil buffer => a depth/stencil buffer)
(zm != 0 && m_context->TEST.ZTST <= ZTST_ALWAYS && !m_context->TEST.DATE) ||
// Depth will be written through the RT
(context->FRAME.FBP == context->ZBUF.ZBP && !PRIM->TME && zm == 0 && fm == 0 && context->TEST.ZTE)
);
const bool draw_sprite_tex = PRIM->TME && (m_vt.m_primclass == GS_SPRITE_CLASS);
const GSVector4 delta_p = m_vt.m_max.p - m_vt.m_min.p;
bool single_page = (delta_p.x <= 64.0f) && (delta_p.y <= 64.0f);
if (m_channel_shuffle) {
m_channel_shuffle = draw_sprite_tex && (m_context->TEX0.PSM == PSM_PSMT8) && single_page;
if (m_channel_shuffle) {
GL_CACHE("Channel shuffle effect detected SKIP");
return;
}
} else if (draw_sprite_tex && m_context->FRAME.Block() == m_context->TEX0.TBP0) {
// Special post-processing effect
if ((m_context->TEX0.PSM == PSM_PSMT8) && single_page) {
GL_INS("Channel shuffle effect detected");
m_channel_shuffle = true;
} else {
GL_DBG("Special post-processing effect not supported");
m_channel_shuffle = false;
}
} else {
m_channel_shuffle = false;
}
GIFRegTEX0 TEX0;
TEX0.TBP0 = context->FRAME.Block();
TEX0.TBW = context->FRAME.FBW;
TEX0.PSM = context->FRAME.PSM;
GSTextureCache::Target* rt = NULL;
GSTexture* rt_tex = NULL;
if (!no_rt) {
rt = m_tc->LookupTarget(TEX0, m_width, m_height, GSTextureCache::RenderTarget, true, fm);
rt_tex = rt->m_texture;
}
TEX0.TBP0 = context->ZBUF.Block();
TEX0.TBW = context->FRAME.FBW;
TEX0.PSM = context->ZBUF.PSM;
GSTextureCache::Target* ds = NULL;
GSTexture* ds_tex = NULL;
if (!no_ds) {
ds = m_tc->LookupTarget(TEX0, m_width, m_height, GSTextureCache::DepthStencil, context->DepthWrite());
ds_tex = ds->m_texture;
}
m_src = nullptr;
m_texture_shuffle = false;
if(PRIM->TME)
{
GIFRegCLAMP MIP_CLAMP = context->CLAMP;
int mxl = std::min<int>((int)m_context->TEX1.MXL, 6);
m_lod = GSVector2i(0, 0);
// Code from the SW renderer
if (IsMipMapActive()) {
int interpolation = (context->TEX1.MMIN & 1) + 1; // 1: round, 2: tri
int k = (m_context->TEX1.K + 8) >> 4;
int lcm = m_context->TEX1.LCM;
if ((int)m_vt.m_lod.x >= mxl) {
k = mxl; // set lod to max level
lcm = 1; // constant lod
}
if (PRIM->FST) {
ASSERT(lcm == 1);
ASSERT(((m_vt.m_min.t.uph(m_vt.m_max.t) == GSVector4::zero()).mask() & 3) == 3); // ratchet and clank (menu)
lcm = 1;
}
if (lcm == 1) {
m_lod.x = std::max<int>(k, 0);
m_lod.y = m_lod.x;
} else {
// Not constant but who care !
if (interpolation == 2) {
// Mipmap Linear. Both layers are sampled, only take the big one
m_lod.x = std::max<int>((int)floor(m_vt.m_lod.x), 0);
} else {
// On GS lod is a fixed float number 7:4 (4 bit for the frac part)
#if 0
m_lod.x = std::max<int>((int)round(m_vt.m_lod.x + 0.0625), 0);
#else
// Same as above with a bigger margin on rounding
// The goal is to avoid 1 undrawn pixels around the edge which trigger the load of the big
// layer.
if (ceil(m_vt.m_lod.x) < m_vt.m_lod.y)
m_lod.x = std::max<int>((int)round(m_vt.m_lod.x + 0.0625 + 0.01), 0);
else
m_lod.x = std::max<int>((int)round(m_vt.m_lod.x + 0.0625), 0);
#endif
}
m_lod.y = std::max<int>((int)ceil(m_vt.m_lod.y), 0);
}
m_lod.x = std::min<int>(m_lod.x, mxl);
m_lod.y = std::min<int>(m_lod.y, mxl);
TEX0 = GetTex0Layer(m_lod.x);
MIP_CLAMP.MINU >>= m_lod.x;
MIP_CLAMP.MINV >>= m_lod.x;
MIP_CLAMP.MAXU >>= m_lod.x;
MIP_CLAMP.MAXV >>= m_lod.x;
for (int i = 0; i < m_lod.x; i++) {
m_vt.m_min.t *= 0.5f;
m_vt.m_max.t *= 0.5f;
}
GL_CACHE("Mipmap LOD %d %d (%f %f) new size %dx%d (K %d L %u)", m_lod.x, m_lod.y, m_vt.m_lod.x, m_vt.m_lod.y, 1 << TEX0.TW, 1 << TEX0.TH, m_context->TEX1.K, m_context->TEX1.L);
} else {
TEX0 = GetTex0Layer(0);
}
m_context->offset.tex = m_mem.GetOffset(TEX0.TBP0, TEX0.TBW, TEX0.PSM);
GSVector4i r;
GetTextureMinMax(r, TEX0, MIP_CLAMP, m_vt.IsLinear());
m_src = tex_psm.depth ? m_tc->LookupDepthSource(TEX0, env.TEXA, r) : m_tc->LookupSource(TEX0, env.TEXA, r);
// Round 2
if (IsMipMapActive() && m_mipmap == 2 && !tex_psm.depth) {
// Upload remaining texture layers
GSVector4 tmin = m_vt.m_min.t;
GSVector4 tmax = m_vt.m_max.t;
for (int layer = m_lod.x + 1; layer <= m_lod.y; layer++) {
const GIFRegTEX0& MIP_TEX0 = GetTex0Layer(layer);
m_context->offset.tex = m_mem.GetOffset(MIP_TEX0.TBP0, MIP_TEX0.TBW, MIP_TEX0.PSM);