Skip to content

Commit

Permalink
[VM][FM7][VRAM] Make gcc using SIMD (when optimize options have set) …
Browse files Browse the repository at this point in the history
…to be faster rendering.

[VM][FM7][VRAM] Do not use inline, to make compiler optimizing.
  • Loading branch information
Artanejp committed Oct 9, 2016
1 parent 60e09c5 commit d100ae8
Show file tree
Hide file tree
Showing 3 changed files with 207 additions and 13 deletions.
14 changes: 14 additions & 0 deletions source/src/vm/fm7/display.cpp
Expand Up @@ -30,6 +30,20 @@ DISPLAY::DISPLAY(VM* parent_vm, EMU* parent_emu) : DEVICE(parent_vm, parent_emu)
mainio = NULL;
subcpu = NULL;
keyboard = NULL;
for(int i = 0; i < 256; i++) {
uint16_t n = (uint16_t)i;
for(int j = 0; j < 8; j++) {
bit_trans_table_0[i][j] = n & 0x80;
bit_trans_table_1[i][j] = ((n & 0x80) != 0) ? 0x40 : 0;
bit_trans_table_2[i][j] = ((n & 0x80) != 0) ? 0x20 : 0;
bit_trans_table_3[i][j] = ((n & 0x80) != 0) ? 0x10 : 0;
#if defined(_FM77AV40) || defined(_FM77AV40EX) || defined(_FM77AV40SX)
bit_trans_table_4[i][j] = ((n & 0x80) != 0) ? 0x08 : 0;
bit_trans_table_5[i][j] = ((n & 0x80) != 0) ? 0x04 : 0;
#endif
n <<= 1;
}
}
set_device_name(_T("DISPLAY SUBSYSTEM"));
}

Expand Down
20 changes: 15 additions & 5 deletions source/src/vm/fm7/fm7_display.h
Expand Up @@ -19,7 +19,17 @@ class MC6809;

class DISPLAY: public DEVICE
{
protected:
private:

uint16_t bit_trans_table_0[256][8];
uint16_t bit_trans_table_1[256][8];
uint16_t bit_trans_table_2[256][8];
uint16_t bit_trans_table_3[256][8];
#if defined(_FM77AV40) || defined(_FM77AV40EX) || defined(_FM77AV40SX)
uint16_t bit_trans_table_4[256][8];
uint16_t bit_trans_table_5[256][8];
#endif
protected:
EMU *p_emu;
VM *p_vm;

Expand Down Expand Up @@ -248,11 +258,11 @@ class DISPLAY: public DEVICE
DEVICE *subcpu;
DEVICE *keyboard;
bool vram_wrote;
inline void GETVRAM_8_200L(int yoff, scrntype_t *p, uint32_t rgbmask, bool window_inv);
inline void GETVRAM_4096(int yoff, scrntype_t *p, uint32_t rgbmask, bool window_inv);
void GETVRAM_8_200L(int yoff, scrntype_t *p, uint32_t rgbmask, bool window_inv);
void GETVRAM_4096(int yoff, scrntype_t *p, uint32_t rgbmask, bool window_inv);
#if defined(_FM77AV40) || defined(_FM77AV40EX) || defined(_FM77AV40SX)
inline void GETVRAM_8_400L(int yoff, scrntype_t *p, uint32_t mask, bool window_inv);
inline void GETVRAM_256k(int yoff, scrntype_t *p, uint32_t mask);
void GETVRAM_8_400L(int yoff, scrntype_t *p, uint32_t mask, bool window_inv);
void GETVRAM_256k(int yoff, scrntype_t *p, uint32_t mask);
#endif
uint8_t read_vram_l4_400l(uint32_t addr, uint32_t offset);
uint8_t read_mmio(uint32_t addr);
Expand Down
186 changes: 178 additions & 8 deletions source/src/vm/fm7/vram.cpp
Expand Up @@ -51,7 +51,7 @@ void DISPLAY::write_vram_l4_400l(uint32_t addr, uint32_t offset, uint32_t data)
#endif
}

inline void DISPLAY::GETVRAM_8_200L(int yoff, scrntype_t *p, uint32_t mask,
void DISPLAY::GETVRAM_8_200L(int yoff, scrntype_t *p, uint32_t mask,
bool window_inv = false)
{
register uint8_t b, r, g;
Expand Down Expand Up @@ -81,7 +81,21 @@ inline void DISPLAY::GETVRAM_8_200L(int yoff, scrntype_t *p, uint32_t mask,
if(mask & 0x01) b = gvram_shadow[yoff_d + 0x00000];
if(mask & 0x02) r = gvram_shadow[yoff_d + 0x04000];
if(mask & 0x04) g = gvram_shadow[yoff_d + 0x08000];

#if 1
uint16_t *pg = &(bit_trans_table_0[g][0]);
uint16_t *pr = &(bit_trans_table_1[r][0]);
uint16_t *pb = &(bit_trans_table_2[b][0]);
uint16_t tmp_d[8];
for(int i = 0; i < 8; i++) {
tmp_d[i] = pg[i] | pr[i] | pb[i];
}
for(int i = 0; i < 8; i++) {
tmp_d[i] = tmp_d[i] >> 5;
}
for(int i = 0; i < 8; i++) {
p[i] = dpalette_pixel[tmp_d[i]];
}
#else
dot = ((g & 0x80) >> 5) | ((r & 0x80) >> 6) | ((b & 0x80) >> 7);
p[0] = dpalette_pixel[dot];
dot = ((g & 0x40) >> 4) | ((r & 0x40) >> 5) | ((b & 0x40) >> 6);
Expand All @@ -99,11 +113,12 @@ inline void DISPLAY::GETVRAM_8_200L(int yoff, scrntype_t *p, uint32_t mask,
p[6] = dpalette_pixel[dot];
dot = ((g & 0x1) << 2) | ((r & 0x1) << 1) | (b & 0x1);
p[7] = dpalette_pixel[dot];
#endif
}

#if defined(_FM77AV40) || defined(_FM77AV40EX) || defined(_FM77AV40SX)
inline void DISPLAY::GETVRAM_8_400L(int yoff, scrntype_t *p, uint32_t mask,
bool window_inv = false)
void DISPLAY::GETVRAM_8_400L(int yoff, scrntype_t *p, uint32_t mask,
bool window_inv = false)
{
register uint8_t b, r, g;
register uint32_t dot;
Expand All @@ -127,7 +142,21 @@ inline void DISPLAY::GETVRAM_8_400L(int yoff, scrntype_t *p, uint32_t mask,
if(mask & 0x01) b = gvram_shadow[yoff_d + 0x00000];
if(mask & 0x02) r = gvram_shadow[yoff_d + 0x08000];
if(mask & 0x04) g = gvram_shadow[yoff_d + 0x10000];

#if 1
uint16_t *pg = &(bit_trans_table_0[g][0]);
uint16_t *pr = &(bit_trans_table_1[r][0]);
uint16_t *pb = &(bit_trans_table_2[b][0]);
uint16_t tmp_d[8];
for(int i = 0; i < 8; i++) {
tmp_d[i] = pg[i] | pr[i] | pb[i];
}
for(int i = 0; i < 8; i++) {
tmp_d[i] = tmp_d[i] >> 5;
}
for(int i = 0; i < 8; i++) {
p[i] = dpalette_pixel[tmp_d[i]];
}
#else
dot = ((g & 0x80) >> 5) | ((r & 0x80) >> 6) | ((b & 0x80) >> 7);
p[0] = dpalette_pixel[dot];
dot = ((g & 0x40) >> 4) | ((r & 0x40) >> 5) | ((b & 0x40) >> 6);
Expand All @@ -145,9 +174,10 @@ inline void DISPLAY::GETVRAM_8_400L(int yoff, scrntype_t *p, uint32_t mask,
p[6] = dpalette_pixel[dot];
dot = ((g & 0x1) << 2) | ((r & 0x1) << 1) | (b & 0x1);
p[7] = dpalette_pixel[dot];
#endif
}

inline void DISPLAY::GETVRAM_256k(int yoff, scrntype_t *p, uint32_t mask)
void DISPLAY::GETVRAM_256k(int yoff, scrntype_t *p, uint32_t mask)
{
register uint32_t b3, r3, g3;
register uint32_t b4, r4, g4;
Expand All @@ -168,6 +198,90 @@ inline void DISPLAY::GETVRAM_256k(int yoff, scrntype_t *p, uint32_t mask)

yoff_d1 = yoff;
yoff_d2 = yoff;
#if 1
uint8_t bb[8], rr[8], gg[8];
uint16_t *p0, *p1, *p2, *p3, *p4, *p5;
uint32_t _btmp[8], _rtmp[8], _gtmp[8];
if(mask & 0x01) {
// B
bb[0] = gvram_shadow[yoff_d1];
bb[1] = gvram_shadow[yoff_d1 + 0x02000];

bb[2] = gvram_shadow[yoff_d2 + 0x0c000];
bb[3] = gvram_shadow[yoff_d2 + 0x0e000];

bb[4] = gvram_shadow[yoff_d1 + 0x18000];
bb[5] = gvram_shadow[yoff_d1 + 0x1a000];

p0 = &(bit_trans_table_0[bb[0]][0]);
p1 = &(bit_trans_table_1[bb[1]][0]);
p2 = &(bit_trans_table_2[bb[2]][0]);
p3 = &(bit_trans_table_3[bb[3]][0]);
p4 = &(bit_trans_table_4[bb[4]][0]);
p5 = &(bit_trans_table_5[bb[5]][0]);
for(int i = 0; i < 8; i++) {
_btmp[i] = p0[i] | p1[i] | p2[i] | p3[i] | p4[i] | p5[i];
}
} else {
for(int i = 0; i < 8; i++) {
_btmp[i] = 0;
}
}
if(mask & 0x02) {
// R
rr[0] = gvram_shadow[yoff_d1 + 0x04000];
rr[1] = gvram_shadow[yoff_d1 + 0x06000];

rr[2] = gvram_shadow[yoff_d2 + 0x10000];
rr[3] = gvram_shadow[yoff_d2 + 0x12000];

rr[4] = gvram_shadow[yoff_d1 + 0x1c000];
rr[5] = gvram_shadow[yoff_d1 + 0x1e000];

p0 = &(bit_trans_table_0[rr[0]][0]);
p1 = &(bit_trans_table_1[rr[1]][0]);
p2 = &(bit_trans_table_2[rr[2]][0]);
p3 = &(bit_trans_table_3[rr[3]][0]);
p4 = &(bit_trans_table_4[rr[4]][0]);
p5 = &(bit_trans_table_5[rr[5]][0]);
for(int i = 0; i < 8; i++) {
_rtmp[i] = p0[i] | p1[i] | p2[i] | p3[i] | p4[i] | p5[i];
}
} else {
for(int i = 0; i < 8; i++) {
_rtmp[i] = 0;
}
}
if(mask & 0x04) {
// G
gg[0] = gvram_shadow[yoff_d1 + 0x08000];
gg[1] = gvram_shadow[yoff_d1 + 0x0a000];

gg[2] = gvram_shadow[yoff_d2 + 0x14000];
gg[3] = gvram_shadow[yoff_d2 + 0x16000];

gg[4] = gvram_shadow[yoff_d1 + 0x20000];
gg[5] = gvram_shadow[yoff_d1 + 0x22000];

p0 = &(bit_trans_table_0[gg[0]][0]);
p1 = &(bit_trans_table_1[gg[1]][0]);
p2 = &(bit_trans_table_2[gg[2]][0]);
p3 = &(bit_trans_table_3[gg[3]][0]);
p4 = &(bit_trans_table_4[gg[4]][0]);
p5 = &(bit_trans_table_5[gg[5]][0]);
for(int i = 0; i < 8; i++) {
_gtmp[i] = p0[i] | p1[i] | p2[i] | p3[i] | p4[i] | p5[i];
}
} else {
for(int i = 0; i < 8; i++) {
_gtmp[i] = 0;
}
}
for(int i = 0; i < 8; i++) {
p[i] = RGB_COLOR(_rtmp[i], _gtmp[i], _btmp[i]);
}

#else
if(mask & 0x01) {
b3 = gvram_shadow[yoff_d1] << 24;
b3 |= gvram_shadow[yoff_d1 + 0x02000] << 16;
Expand Down Expand Up @@ -228,15 +342,18 @@ inline void DISPLAY::GETVRAM_256k(int yoff, scrntype_t *p, uint32_t mask)
//p[cp + 1] = pixel;
cp += 1;
}

#endif
}
#endif

#if defined(_FM77AV_VARIANTS)
inline void DISPLAY::GETVRAM_4096(int yoff, scrntype_t *p, uint32_t mask,
void DISPLAY::GETVRAM_4096(int yoff, scrntype_t *p, uint32_t mask,
bool window_inv = false)
{
uint32_t b3, r3, g3;
uint8_t bb[4], rr[4], gg[4];
uint16_t pixels[8];

scrntype_t b, r, g;
uint32_t idx;;
scrntype_t pixel;
Expand All @@ -262,7 +379,59 @@ inline void DISPLAY::GETVRAM_4096(int yoff, scrntype_t *p, uint32_t mask,
yoff_d2 += 0x18000;
}
# endif
#if 1
bb[0] = gvram_shadow[yoff_d1];
bb[1] = gvram_shadow[yoff_d1 + 0x02000];
rr[0] = gvram_shadow[yoff_d1 + 0x04000];
rr[1] = gvram_shadow[yoff_d1 + 0x06000];

gg[0] = gvram_shadow[yoff_d1 + 0x08000];
gg[1] = gvram_shadow[yoff_d1 + 0x0a000];

bb[2] = gvram_shadow[yoff_d2 + 0x0c000];
bb[3] = gvram_shadow[yoff_d2 + 0x0e000];

rr[2] = gvram_shadow[yoff_d2 + 0x10000];
rr[3] = gvram_shadow[yoff_d2 + 0x12000];
gg[2] = gvram_shadow[yoff_d2 + 0x14000];
gg[3] = gvram_shadow[yoff_d2 + 0x16000];

uint16_t tmp_g[8], tmp_r[8], tmp_b[8];
uint16_t *p0, *p1, *p2, *p3;
// G
p0 = &(bit_trans_table_0[gg[0]][0]);
p1 = &(bit_trans_table_1[gg[1]][0]);
p2 = &(bit_trans_table_2[gg[2]][0]);
p3 = &(bit_trans_table_3[gg[3]][0]);
for(int i = 0; i < 8; i++) {
tmp_g[i] = p0[i] | p1[i] | p2[i] | p3[i];
}
// R
p0 = &(bit_trans_table_0[rr[0]][0]);
p1 = &(bit_trans_table_1[rr[1]][0]);
p2 = &(bit_trans_table_2[rr[2]][0]);
p3 = &(bit_trans_table_3[rr[3]][0]);
for(int i = 0; i < 8; i++) {
tmp_r[i] = p0[i] | p1[i] | p2[i] | p3[i];
}
// B
p0 = &(bit_trans_table_0[bb[0]][0]);
p1 = &(bit_trans_table_1[bb[1]][0]);
p2 = &(bit_trans_table_2[bb[2]][0]);
p3 = &(bit_trans_table_3[bb[3]][0]);
for(int i = 0; i < 8; i++) {
tmp_b[i] = p0[i] | p1[i] | p2[i] | p3[i];
}
for(int i = 0; i < 8; i++) {
pixels[i] = (tmp_g[i] * 16) | tmp_r[i] | (tmp_b[i] / 16);
}
for(int i = 0; i < 8; i++) {
pixels[i] = pixels[i] & mask;
}
for(int i = 0; i < 8; i++) {
p[i] = analog_palette_pixel[pixels[i]];
}
#else
b3 = gvram_shadow[yoff_d1] << 24;
b3 |= gvram_shadow[yoff_d1 + 0x02000] << 16;
r3 = gvram_shadow[yoff_d1 + 0x04000] << 24;
Expand Down Expand Up @@ -350,6 +519,7 @@ inline void DISPLAY::GETVRAM_4096(int yoff, scrntype_t *p, uint32_t mask,
idx = (g | b | r ) & mask;
pixel = analog_palette_pixel[idx];
p[7] = pixel;
#endif
//p[15] = pixel;
}
#endif
Expand Down

0 comments on commit d100ae8

Please sign in to comment.