MCOC115/asmsrc/mcvm_lib_math.incl

// ================================
// Moscovium
//		hfpu mathematical function library
//		(c) 2021	1YEN Toru
//
//		2021/07/10	ver.1.02
//			using hcmp instruction
//
//		2021/07/03	ver.1.00
//			math_atan: half r0=atan (half r1), disturbed r1~r3
//			math_exp: half r0=exp (half r1), disturbed r1~r5
//			math_log: half r0=log (half r1), disturbed r1~r4
//			math_sin: half r0=sin (half r1), disturbed r1~r4
//			math_abs: half r0=abs (half r1), disturbed r1
//			math_ceil: half r0=ceil (half r1) ; disturbed r2
//			math_floor: half r0=floor (half r1), disturbed r1,r2
//			math_round: half r0=round (half r1), disturbed r1,r2
//
// ================================


// ================================
// half r0=atan (half r1), disturbed r1~r3
// ================================
math_atan:
// if (abs (r1)>1) atan (1/r1)=sgn (r1)*pi/2 - atan (x)
ldbiu	r3,0							// 1=negate, 2=inverse / 0=not
mov		r1,r1
bpl		pcnt+4
hneg	r1,r1
ori		r3,1
hldi	r2,1
hcmp	r1,r2
bmi		pcnt+6
hdiv	r2,r1
mov		r1,r2
ori		r3,2

// r0=atan (r1); r0=r1/y[1], y[n]=2n - 1 + (n*r1)^2/y[n + 1]
// n=4
hldi	r2,8.470482158
// n=3
hldi	r0,3
hmul	r0,r1
hmul	r0,r0
hdiv	r0,r2
hldi	r2,2*3-1
hadd	r2,r0
// n=2
hldi	r0,2
hmul	r0,r1
hmul	r0,r0
hdiv	r0,r2
hldi	r2,2*2-1
hadd	r2,r0
// n=1
mov		r0,r1
hmul	r0,r0
hdiv	r0,r2
hldi	r2,2*1-1
hadd	r2,r0
// n=0
mov		r0,r1
hdiv	r0,r2

// negate, inverse
mov		r2,r3
andi	r2,2
beq		pcnt+8
hldi	r2,3.1415926535/2
hsub	r2,r0
mov		r0,r2
andi	r3,1
beq		pcnt+2
hneg	r0,r0

rtnw


// ================================
// half r0=exp (half r1), disturbed r1~r5
// ================================
math_exp:
// if (r1<0) { r1=1/r1; r5=1 }
ldbiu	r5,0							// 1=inverse / 0=not
mov		r1,r1
bpl		pcnt+4
hneg	r1,r1
ori		r5,1

// r1=int (x*2)/2 + fx ==> r1=fx; r4=int (x*2)
hldi	r4,2
mov		r0,r4
hmul	r4,r1
huint	r4,r4
hhalf	r2,r4
hdiv	r2,r0
hsub	r1,r2

// r0=exp (r1)=sigma (r1^n/n!)
mov		r2,r1

// n=0
hldi	r1,1
hldi	r0,1
// n=1
hmul	r1,r2
hldi	r3,1./(1)
hmul	r3,r1
pushw	r3
// n=2
hmul	r1,r2
hldi	r3,1./(2*1)
hmul	r3,r1
pushw	r3
// n=3
hmul	r1,r2
hldi	r3,1./(3*2*1)
hmul	r3,r1
pushw	r3
// n=4
hmul	r1,r2
hldi	r3,1./(4*3*2*1)
hmul	r3,r1

// sigma
hadd	r0,r3							// n=4
popw	r3
hadd	r0,r3							// n=3
popw	r3
hadd	r0,r3							// n=2
popw	r3
hadd	r0,r3							// n=1

// exp (r4/2)
cmpi	r4,23
blo		math_exp_lt_23
ldwi	r4,half_inf
bra		math_exp_endif

math_exp_lt_23:
ldwi	r2,lab_math_exp_tab
add		r2,r4
add		r2,r4
ldw		r4,[r2]
math_exp_endif:
hmul	r0,r4

// inverse
mov		r5,r5
beq		pcnt+8
hldi	r2,1
hdiv	r2,r0
mov		r0,r2

rtnw

math_exp_tab:
dath	exp(0)
dath	exp(0.5)
dath	exp(1)
dath	exp(1.5)
dath	exp(2)
dath	exp(2.5)
dath	exp(3)
dath	exp(3.5)
dath	exp(4)
dath	exp(4.5)
dath	exp(5)
dath	exp(5.5)
dath	exp(6)
dath	exp(6.5)
dath	exp(7)
dath	exp(7.5)
dath	exp(8)
dath	exp(8.5)
dath	exp(9)
dath	exp(9.5)
dath	exp(10)
dath	exp(10.5)
dath	exp(11)
dath	exp(11.5)


// ================================
// half r0=log (half r1), disturbed r1~r4
// ================================
math_log:
// r1=fx*2^ex ==> r4=ex; r1=fx
mov		r4,r1
lsfti	r4,-10
andi	r4,0x1f
beq		math_log_inf_n
cmpi	r4,0x1f
beq		math_log_inf_nan
subi	r4,15
ldwi	r0,~(0x1f<<10)
and		r1,r0
ldwi	r0,(0x0f<<10)
or		r1,r0
// if (r1<0) return ( r0=NaN )
mov		r1,r1
bmi		math_log_nan

// r0=log (r1)=sigma (((r1 - 1)/(r1 + 1))^(2n + 1)/(2n + 1));
hldi	r2,1
mov		r3,r2
hadd	r2,r1
hsub	r1,r3
hdiv	r1,r2
mov		r2,r1
hmul	r2,r2

// fx
// n=0
pushw	r1
// n=1
hmul	r1,r2
hldi	r3,1./(2*1+1)
hmul	r3,r1
pushw	r3
// n=2
hmul	r1,r2
hldi	r3,1./(2*2+1)
hmul	r3,r1
#pushw	r3

// sigma
mov		r0,r3								// n=2
popw	r3
hadd	r0,r3								// n=1
popw	r3
hadd	r0,r3								// n=0

// ex*log (2)
hldi	r2,2
hmul	r0,r2
mov		r4,r4
bpl		pcnt+8
neg		r4
hhalf	r4,r4
hneg	r4,r4
bra		pcnt+2
hhalf	r4,r4
hldi	r2,ln(2)
hmul	r4,r2
hadd	r0,r4

rtnw

math_log_inf_nan:
mov		r0,r1
bmi		math_log_nan
hmvsg	r0,r0							// NaN => -NaN
rtnw

math_log_inf_n:
ldwi	r0,half_inf_n
rtnw

math_log_nan:
ldwi	r0,half_nan
rtnw


// ================================
// half r0=sin (half r1), disturbed r1~r4
// ================================
math_sin:
// if (r1<0) { r1=-r1; r4=r4^1; }
ldbiu	r4,0							// 1=inverse / 0=not
mov		r1,r1
bpl		pcnt+4
eori	r4,1
hneg	r1,r1

// r1=r1%(2*PI)
mov		r2,r1
hldi	r0,2*3.1415926535
hdiv	r2,r0
huint	r2,r2
hhalf	r2,r2
hmul	r2,r0
hsub	r1,r2
mov		r1,r1
bpl		pcnt+2
hadd	r1,r0

// r1 range: PI~2PI => 0~PI (inverse)
hldi	r0,3.1415926535
mov		r2,r1
hsub	r2,r0
mov		r2,r2
bmi		pcnt+4
eori	r4,1
mov		r1,r2
// r1 range: PI/2~PI => PI/2~0
hldi	r2,3.1415926535/2
hcmp	r2,r1
bpl		pcnt+4
hsub	r0,r1
mov		r1,r0

// r0=sin (r1)=sigma ((-1)^n*r1^(2n+1)/(2n+1)!)
mov		r2,r1
hmul	r2,r2
ldbiu	r0,0
// n=0
pushw	r1
// n=1
hmul	r1,r2
hldi	r3,-0.166666667
hmul	r3,r1
pushw	r3
// n=2
hmul	r1,r2
hldi	r3,0.008333333
hmul	r3,r1
pushw	r3
// n=3
hmul	r1,r2
hldi	r3,-0.000198413
hmul	r3,r1
#pushw	r3

// sigma
mov		r0,r3							// n=3
popw	r3
hadd	r0,r3							// n=2
popw	r3
hadd	r0,r3							// n=1
popw	r3
hadd	r0,r3							// n=0

// negate
mov		r4,r4
beq		pcnt+2
hneg	r0,r0

rtnw


// ================================
// half r0=abs (half r1), disturbed r1
// ================================
math_abs:
ldbiu	r0,0
hmvsg	r1,r0
mov		r0,r1
rtnw


// ================================
// half r0=ceil (half r1) ; disturbed r2
// ================================
math_ceil:
// inf or nan?
hmvsg	r1,r1
ldwi	r2,half_inf
cmp		r1,r2
beq		math_ceil_inf_nan
ldbih	r2,half_inf_n>>8
cmp		r1,r2
beq		math_ceil_inf_nan
ldbih	r2,half_nan>>8
cmp		r1,r2
beq		math_ceil_inf_nan
// finite
huint	r2,r1
hhalf	r0,r2
hmvsg	r0,r1
hcmp	r0,r1
bpl		pcnt+2
addi	r2,1
hhalf	r0,r2
hmvsg	r0,r1
rtnw

math_ceil_inf_nan:
mov		r0,r1
rtnw


// ================================
// half r0=floor (half r1), disturbed r1,r2
// ================================
math_floor:
// inf or nan?
hmvsg	r1,r1
ldwi	r2,half_inf
cmp		r1,r2
beq		math_floor_inf_nan
ldbih	r2,half_inf_n>>8
cmp		r1,r2
beq		math_floor_inf_nan
ldbih	r2,half_nan>>8
cmp		r1,r2
beq		math_floor_inf_nan
// finite
huint	r2,r1
hhalf	r0,r2
hmvsg	r0,r1
hcmp	r1,r0
bpl		pcnt+6
addi	r2,1
hhalf	r0,r2
hmvsg	r0,r1
rtnw

math_floor_inf_nan:
mov		r0,r1
rtnw


// ================================
// half r0=round (half r1), disturbed r1,r2
// ================================
math_round:
// integer?
hfrac	r2,r1
mov		r2,r2
bne		pcnt+4
mov		r0,r1
rtnw

// finite
hldi	r0,0.5
hadd	r1,r0
huint	r2,r1
hhalf	r0,r2
hmvsg	r0,r1
hcmp	r1,r0
bpl		pcnt+6
addi	r2,1
hhalf	r0,r2
hneg	r0,r0
rtnw