Skip to content

Commit

Permalink
pcsx2: Implemented Threaded VU1 :D
Browse files Browse the repository at this point in the history
Threading VU1 took a lot of rewrites and new code to make possible (MTGS, microVU, gifUnit...), but we finally got to the point where it was feasible, and now we've done it! (so now everyone can stop complaining that pcsx2 only takes advantages of 2 cores :p).

The speedups in the games that benefit from it are great if you have a cpu with 3+ cores (generally a 10~45% speedup), however games that are GS limited can be a slowdown (especially on dual core cpu's).

The option can be found in the speedhacks section as "MTVU (Multi-Threaded microVU1)". And when enabled it should should show the VU thread-time percentage on the title bar window (Like we currently do for EE/GS/UI threads).

It is listed as a speedhack because in order for threading VU1 to have been a speedup, we need to assume that games will not send gif packets containing Signal/Finish/Label commands from path 1 (vu1's xgkick). The good news is very-few games ever do this, so the compatibility of MTVU is very high (a game that does do this will likely hang).

Note: vs2010 builds and Linux builds need to be updated to include "MTVU.h" and "MTVU.cpp".


git-svn-id: http://pcsx2.googlecode.com/svn/trunk@4865 96395faa-99c1-11dd-bbfe-3dabce05a288
  • Loading branch information
cottonvibes committed Aug 12, 2011
1 parent 60cec5a commit ac9bf45
Show file tree
Hide file tree
Showing 60 changed files with 1,180 additions and 428 deletions.
6 changes: 4 additions & 2 deletions common/include/Utilities/PageFaultSource.h
Original file line number Diff line number Diff line change
Expand Up @@ -342,8 +342,8 @@ class SpatialArrayReserve : public BaseVmReserveListener
struct _EXCEPTION_POINTERS;
extern int SysPageFaultExceptionFilter(struct _EXCEPTION_POINTERS* eps);

# define PCSX2_PAGEFAULT_PROTECT __try
# define PCSX2_PAGEFAULT_EXCEPT __except(SysPageFaultExceptionFilter(GetExceptionInformation())) {}
# define PCSX2_PAGEFAULT_PROTECT __try
# define PCSX2_PAGEFAULT_EXCEPT __except(SysPageFaultExceptionFilter(GetExceptionInformation())) {}

#else
# error PCSX2 - Unsupported operating system platform.
Expand All @@ -352,5 +352,7 @@ extern int SysPageFaultExceptionFilter(struct _EXCEPTION_POINTERS* eps);
extern void pxInstallSignalHandler();
extern void _platform_InstallSignalHandler();

#include "Threading.h"
extern SrcType_PageFault* Source_PageFault;
extern Threading::Mutex PageFault_Mutex;

38 changes: 35 additions & 3 deletions common/include/Utilities/Threading.h
Original file line number Diff line number Diff line change
Expand Up @@ -179,17 +179,20 @@ namespace Threading
// from these little beasties! (these are all implemented internally using cross-platform
// implementations of _InterlockedExchange and such)

extern u32 AtomicRead( volatile u32& Target );
extern s32 AtomicRead( volatile s32& Target );
extern u32 AtomicExchange( volatile u32& Target, u32 value );
extern u32 AtomicExchangeAdd( volatile u32& Target, u32 value );
extern u32 AtomicIncrement( volatile u32& Target );
extern u32 AtomicDecrement( volatile u32& Target );
extern s32 AtomicExchange( volatile s32& Target, s32 value );
extern u32 AtomicExchangeAdd( volatile u32& Target, u32 value );
extern s32 AtomicExchangeAdd( volatile s32& Target, s32 value );
extern s32 AtomicExchangeSub( volatile s32& Target, s32 value );
extern u32 AtomicIncrement( volatile u32& Target );
extern s32 AtomicIncrement( volatile s32& Target );
extern u32 AtomicDecrement( volatile u32& Target );
extern s32 AtomicDecrement( volatile s32& Target );

extern bool AtomicBitTestAndReset( volatile u32& bitset, u8 bit );
extern bool AtomicBitTestAndReset( volatile s32& bitset, u8 bit );

extern void* _AtomicExchangePointer( volatile uptr& target, uptr value );
extern void* _AtomicCompareExchangePointer( volatile uptr& target, uptr value, uptr comparand );
Expand Down Expand Up @@ -393,5 +396,34 @@ namespace Threading

bool Failed() const { return !m_IsLocked; }
};

// --------------------------------------------------------------------------------------
// ScopedLockBool
// --------------------------------------------------------------------------------------
// A ScopedLock in which you specify an external bool to get updated on locks/unlocks.
// Note that the isLockedBool should only be used as an indicator for the locked status,
// and not actually depended on for thread synchronization...

struct ScopedLockBool {
ScopedLock m_lock;
volatile __aligned(4) bool& m_bool;

ScopedLockBool(Mutex& mutexToLock, volatile __aligned(4) bool& isLockedBool)
: m_lock(mutexToLock),
m_bool(isLockedBool) {
m_bool = m_lock.IsLocked();
}
virtual ~ScopedLockBool() throw() {
m_bool = false;
}
void Acquire() {
m_lock.Acquire();
m_bool = m_lock.IsLocked();
}
void Release() {
m_bool = false;
m_lock.Release();
}
};
}

8 changes: 5 additions & 3 deletions common/include/x86emitter/x86types.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,10 +35,12 @@ enum XMMSSEType
// as a project option. The multithreaded emitter relies on native compiler support for
// TLS -- Macs are crap out of luck there (for now).

#include "Utilities/Threading.h"

#ifndef x86EMIT_MULTITHREADED
# define x86EMIT_MULTITHREADED 0
#else
# if !PCSX2_THREAD_LOCAL
# if PCSX2_THREAD_LOCAL
# define x86EMIT_MULTITHREADED 1
# else
// No TLS support? Force-clear the MT flag:
# pragma message("x86emitter: TLS not available, multithreaded emitter disabled.")
# undef x86EMIT_MULTITHREADED
Expand Down
6 changes: 6 additions & 0 deletions common/src/Utilities/Linux/LnxHostSys.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,12 @@ static void SysPageFaultSignalFilter( int signal, siginfo_t *siginfo, void * )
// Note: Use of stdio functions isn't safe here. Avoid console logs,
// assertions, file logs, or just about anything else useful.


// Note: This signal can be accessed by the EE or MTVU thread
// Source_PageFault is a global variable with its own state information
// so for now we lock this exception code unless someone can fix this better...
Threading::ScopedLock lock(PageFault_Mutex);

Source_PageFault->Dispatch( PageFaultInfo( (uptr)siginfo->si_addr & ~m_pagemask ) );

// resumes execution right where we left off (re-executes instruction that
Expand Down
66 changes: 32 additions & 34 deletions common/src/Utilities/ThreadTools.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -786,72 +786,70 @@ void Threading::WaitEvent::Wait()
// InterlockedExchanges / AtomicExchanges (PCSX2's Helper versions)
// --------------------------------------------------------------------------------------
// define some overloads for InterlockedExchanges for commonly used types, like u32 and s32.
// Note: For all of these atomic operations below to be atomic, the variables need to be 4-byte
// aligned. Read: http://msdn.microsoft.com/en-us/library/ms684122%28v=vs.85%29.aspx

__fi bool Threading::AtomicBitTestAndReset( volatile u32& bitset, u8 bit )
{
return _interlockedbittestandreset( (volatile long*)& bitset, bit ) != 0;
__fi u32 Threading::AtomicRead(volatile u32& Target) {
return Target; // Properly-aligned 32-bit reads are atomic
}

__fi u32 Threading::AtomicExchange( volatile u32& Target, u32 value )
{
return _InterlockedExchange( (volatile long*)&Target, value );
__fi s32 Threading::AtomicRead(volatile s32& Target) {
return Target; // Properly-aligned 32-bit reads are atomic
}

__fi u32 Threading::AtomicExchangeAdd( volatile u32& Target, u32 value )
{
return _InterlockedExchangeAdd( (volatile long*)&Target, value );
__fi bool Threading::AtomicBitTestAndReset( volatile u32& bitset, u8 bit ) {
return _interlockedbittestandreset( (volatile long*)& bitset, bit ) != 0;
}

__fi u32 Threading::AtomicIncrement( volatile u32& Target )
{
return _InterlockedExchangeAdd( (volatile long*)&Target, 1 );
__fi bool Threading::AtomicBitTestAndReset( volatile s32& bitset, u8 bit ) {
return _interlockedbittestandreset( (volatile long*)& bitset, bit ) != 0;
}

__fi u32 Threading::AtomicDecrement( volatile u32& Target )
{
return _InterlockedExchangeAdd( (volatile long*)&Target, -1 );
__fi u32 Threading::AtomicExchange(volatile u32& Target, u32 value ) {
return _InterlockedExchange( (volatile long*)&Target, value );
}

__fi s32 Threading::AtomicExchange( volatile s32& Target, s32 value )
{
__fi s32 Threading::AtomicExchange( volatile s32& Target, s32 value ) {
return _InterlockedExchange( (volatile long*)&Target, value );
}

__fi s32 Threading::AtomicExchangeAdd( volatile s32& Target, s32 value )
{
__fi u32 Threading::AtomicExchangeAdd( volatile u32& Target, u32 value ) {
return _InterlockedExchangeAdd( (volatile long*)&Target, value );
}
__fi s32 Threading::AtomicExchangeAdd( volatile s32& Target, s32 value ) {
return _InterlockedExchangeAdd( (volatile long*)&Target, value );
}

__fi s32 Threading::AtomicExchangeSub( volatile s32& Target, s32 value )
{
__fi s32 Threading::AtomicExchangeSub( volatile s32& Target, s32 value ) {
return _InterlockedExchangeAdd( (volatile long*)&Target, -value );
}

__fi s32 Threading::AtomicIncrement( volatile s32& Target )
{
__fi u32 Threading::AtomicIncrement( volatile u32& Target ) {
return _InterlockedExchangeAdd( (volatile long*)&Target, 1 );
}
__fi s32 Threading::AtomicIncrement( volatile s32& Target) {
return _InterlockedExchangeAdd( (volatile long*)&Target, 1 );
}

__fi s32 Threading::AtomicDecrement( volatile s32& Target )
{
__fi u32 Threading::AtomicDecrement( volatile u32& Target ) {
return _InterlockedExchangeAdd( (volatile long*)&Target, -1 );
}
__fi s32 Threading::AtomicDecrement(volatile s32& Target) {
return _InterlockedExchangeAdd((volatile long*)&Target, -1);
}

__fi void* Threading::_AtomicExchangePointer( volatile uptr& target, uptr value )
__fi void* Threading::_AtomicExchangePointer(volatile uptr& target, uptr value)
{
#ifdef _M_AMD64 // high-level atomic ops, please leave these 64 bit checks in place.
return (void*)_InterlockedExchange64( &(volatile s64&)target, value );
return (void*)_InterlockedExchange64(&(volatile s64&)target, value);
#else
return (void*)_InterlockedExchange( (volatile long*)&target, value );
return (void*)_InterlockedExchange((volatile long*)&target, value);
#endif
}

__fi void* Threading::_AtomicCompareExchangePointer( volatile uptr& target, uptr value, uptr comparand )
__fi void* Threading::_AtomicCompareExchangePointer(volatile uptr& target, uptr value, uptr comparand)
{
#ifdef _M_AMD64 // high-level atomic ops, please leave these 64 bit checks in place.
return (void*)_InterlockedCompareExchange64( &(volatile s64&)target, value );
return (void*)_InterlockedCompareExchange64(&(volatile s64&)target, value);
#else
return (void*)_InterlockedCompareExchange( &(volatile long&)target, value, comparand );
return (void*)_InterlockedCompareExchange(&(volatile long&)target, value, comparand);
#endif
}

Expand Down
4 changes: 2 additions & 2 deletions common/src/Utilities/VirtualMemory.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,11 @@
template class EventSource< IEventListener_PageFault >;

SrcType_PageFault* Source_PageFault = NULL;
Threading::Mutex PageFault_Mutex;

void pxInstallSignalHandler()
{
if (!Source_PageFault)
{
if(!Source_PageFault) {
Source_PageFault = new SrcType_PageFault();
}

Expand Down
4 changes: 4 additions & 0 deletions common/src/Utilities/Windows/WinHostSys.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,10 @@ int SysPageFaultExceptionFilter( EXCEPTION_POINTERS* eps )
if( eps->ExceptionRecord->ExceptionCode != EXCEPTION_ACCESS_VIOLATION )
return EXCEPTION_CONTINUE_SEARCH;

// Note: This exception can be accessed by the EE or MTVU thread
// Source_PageFault is a global variable with its own state information
// so for now we lock this exception code unless someone can fix this better...
Threading::ScopedLock lock(PageFault_Mutex);
Source_PageFault->Dispatch( PageFaultInfo( (uptr)eps->ExceptionRecord->ExceptionInformation[1] ) );
return Source_PageFault->WasHandled() ? EXCEPTION_CONTINUE_EXECUTION : EXCEPTION_CONTINUE_SEARCH;
}
Expand Down
4 changes: 3 additions & 1 deletion pcsx2/Config.h
Original file line number Diff line number Diff line change
Expand Up @@ -377,7 +377,8 @@ struct Pcsx2Config
IntcStat :1, // tells Pcsx2 to fast-forward through intc_stat waits.
WaitLoop :1, // enables constant loop detection and fast-forwarding
vuFlagHack :1, // microVU specific flag hack
vuBlockHack :1; // microVU specific block flag no-propagation hack
vuBlockHack :1, // microVU specific block flag no-propagation hack
vuThread :1; // Enable Threaded VU1
BITFIELD_END

u8 EECycleRate; // EE cycle rate selector (1.0, 1.5, 2.0)
Expand Down Expand Up @@ -471,6 +472,7 @@ TraceLogFilters& SetTraceConfig();

// ------------ CPU / Recompiler Options ---------------

#define THREAD_VU1 (EmuConfig.Cpu.Recompiler.UseMicroVU1 && EmuConfig.Speedhacks.vuThread)
#define CHECK_MICROVU0 (EmuConfig.Cpu.Recompiler.UseMicroVU0)
#define CHECK_MICROVU1 (EmuConfig.Cpu.Recompiler.UseMicroVU1)
#define CHECK_EEREC (EmuConfig.Cpu.Recompiler.EnableEE && GetCpuProviders().IsRecAvailable_EE())
Expand Down
3 changes: 1 addition & 2 deletions pcsx2/FiFo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,8 @@
#include "PrecompiledHeader.h"
#include "Common.h"

#include "Gif.h"
#include "Gif_Unit.h"
#include "GS.h"
#include "Gif_Unit.h"
#include "Vif.h"
#include "Vif_Dma.h"
#include "IPU/IPU.h"
Expand Down
1 change: 0 additions & 1 deletion pcsx2/GS.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
#include <list>

#include "GS.h"
#include "Gif.h"
#include "Gif_Unit.h"
#include "Counters.h"

Expand Down
12 changes: 7 additions & 5 deletions pcsx2/GS.h
Original file line number Diff line number Diff line change
Expand Up @@ -245,6 +245,7 @@ enum MTGS_RingCommand
, GS_RINGTYPE_MODECHANGE // for issued mode changes.
, GS_RINGTYPE_CRC
, GS_RINGTYPE_GSPACKET
, GS_RINGTYPE_MTVU_GSPACKET
};


Expand All @@ -263,8 +264,8 @@ class SysMtgsThread : public SysThreadBase

public:
// note: when m_ReadPos == m_WritePos, the fifo is empty
uint m_ReadPos; // cur pos gs is reading from
uint m_WritePos; // cur pos ee thread is writing to
__aligned(4) uint m_ReadPos; // cur pos gs is reading from
__aligned(4) uint m_WritePos; // cur pos ee thread is writing to

volatile bool m_RingBufferIsBusy;
volatile u32 m_SignalRingEnable;
Expand All @@ -273,7 +274,9 @@ class SysMtgsThread : public SysThreadBase
volatile s32 m_QueuedFrameCount;
volatile u32 m_VsyncSignalListener;

Mutex m_mtx_RingBufferBusy;
Mutex m_mtx_RingBufferBusy; // Is obtained while processing ring-buffer data
Mutex m_mtx_RingBufferBusy2; // This one gets released on semaXGkick waiting...
Mutex m_mtx_WaitGS;
Semaphore m_sem_OnRingReset;
Semaphore m_sem_Vsync;

Expand Down Expand Up @@ -304,8 +307,7 @@ class SysMtgsThread : public SysThreadBase
virtual ~SysMtgsThread() throw();

// Waits for the GS to empty out the entire ring buffer contents.
// Used primarily for plugin startup/shutdown.
void WaitGS();
void WaitGS(bool syncRegs=true, bool weakWait=false, bool isMTVU=false);
void ResetGS();

void PrepDataPacket( MTGS_RingCommand cmd, u32 size );
Expand Down
2 changes: 1 addition & 1 deletion pcsx2/Gif.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
#include "Common.h"

#include "GS.h"
#include "Gif.h"
#include "Gif_Unit.h"
#include "Vif_Dma.h"

Expand Down Expand Up @@ -87,6 +86,7 @@ __fi void gifInterrupt()
}

static u32 WRITERING_DMA(u32 *pMem, u32 qwc) {
//qwc = min(qwc, 1024u);
uint size = gifUnit.TransferGSPacketData(GIF_TRANS_DMA, (u8*)pMem, qwc*16) / 16;
incGifChAddr(size);
return size;
Expand Down
12 changes: 7 additions & 5 deletions pcsx2/Gif.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,15 +35,17 @@ enum GIF_PATH {
enum GIF_TRANSFER_TYPE {
GIF_TRANS_INVALID = 0x000, // Invalid
GIF_TRANS_XGKICK = 0x100, // Path 1
GIF_TRANS_DIRECT = 0x201, // Path 2
GIF_TRANS_DIRECTHL = 0x301, // Path 2
GIF_TRANS_DMA = 0x402, // Path 3
GIF_TRANS_FIFO = 0x502 // Path 3
GIF_TRANS_MTVU = 0x200, // Path 1
GIF_TRANS_DIRECT = 0x301, // Path 2
GIF_TRANS_DIRECTHL = 0x401, // Path 2
GIF_TRANS_DMA = 0x502, // Path 3
GIF_TRANS_FIFO = 0x602 // Path 3
};

static const char Gif_TransferStr[6][32] = {
static const char Gif_TransferStr[7][32] = {
"Invalid Transfer Type",
"GIF_TRANS_XGKICK",
"GIF_TRANS_MTVU",
"GIF_TRANS_DIRECT",
"GIF_TRANS_DIRECTHL",
"GIF_TRANS_DMA",
Expand Down
1 change: 0 additions & 1 deletion pcsx2/Gif_Logger.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@

#include "PrecompiledHeader.h"
#include "Common.h"
#include "Gif.h"
#include "Gif_Unit.h"

#define GIF_PARSE DevCon.WriteLn
Expand Down
Loading

0 comments on commit ac9bf45

Please sign in to comment.