-
Notifications
You must be signed in to change notification settings - Fork 0
/
mixer_asm.s
261 lines (193 loc) · 7.68 KB
/
mixer_asm.s
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
mc68040
include "mixer_asm.i"
section .text,code
align 4
xdef _asm_sizeof_mixer;
xdef _Aud_MixLine
xref _Aud_NormFactors_vw;
; Routine for mixing one cache line of samples per channel into the accumulation buffers. Handles update of the
; Channel State, incrementing the sample pointer, decrementing the samples left counter and resetting the state
; once the last line of samples have been fetched.
;
; Once the mixing is complete, the left and right accumulation buffers are scanned for their largest absolute
; values. This is necessary for the dynamics processing.
;
; TODO reduce the register usage count and adjust order non-dependent instructions for better 060 performance
;
; a0 points at mixer
_Aud_MixLine::
Aud_MixLine:
movem.l d2/d3/d4/d5/a2/a3/a4,-(sp)
;
; Initialisation - clear out the accumulation buffers
;
.clear_accum_buffers:
move.w #CACHE_LINE_SIZE-1,d2
lea am_AccumL_vw(a0),a1
.clear_loop:
clr.l (a1)+
dbra d2,.clear_loop
;
; Mixing - Iterate the channel state list. For each channel that has an active pointer and samples remaining,
; transfer a packet to the fetch buffer. For 040 and 060 this is done using move16, so that we arent
; slowly churning out all the datacache.
;
; Fixed number of channels to mix in d2
moveq #AUD_NUM_CHANNELS-1,d2
; Get channelstate array into a1
lea am_ChannelState(a0),a1
.next_channel:
; Get the channel sample data pointer in a2, skip if null. We need to move to a data register to set the CC
move.l ac_SamplePtr_l(a1),d0
beq.s .done_channel
move.l d0,a2
; Check there if data left to process. This really should never happen
tst.w ac_SamplesLeft_w(a1)
beq.s .done_channel
; Get the left/right volume pair, each of which should be 0-15, with 0 being a silence skip
move.w ac_LeftVol_b(a1),d5
; Enforce the range 0-15 for each channel
and.w #$0F0F,d5
; If both are zero, just update the channel state and move along
beq.s .update_channel
.channel_not_silent:
; swap the bytes in d5 to get the left voume in the lower byte first. Endian fail, lol.
rol.w #8,d5
; grab the next 16 samples
lea am_FetchBuffer_vb(a0),a3
; The theory goes, we won't be crapflooding the datacache with the sample data this way...
move16 (a2)+,(a3)+
; Two step loop. The first iteration handles the left channel, the second iteration handles the right
moveq #1,d3
lea am_AccumL_vw(a0),a4 ; note that the right accumulator immediately follows
clr.l d0
;
; Accumulation - For each 8-bit sample in the fetch buffer, look up the 16-bit value in the volume table and
; add to the values in the accumulation buffer
.mix_samples:
move.b d5,d0 ; d0 = 0-15, 0 silence, 1-14 are volume table selectors
beq.s .update_channel
subq.w #1,d0 ; d0 = 0-14, now we need to multiply by 512 to get the table start
lsl.w #8,d0 ;
add.w d0,d0 ; d0 = table position = vol * 256 * sizeof(WORD)
; Add the structure offset and put the effective address into a2
add.w am_TableOffset_w(a0),d0
lea (a0,d0.w),a2
; Point a3 at the cache line of samples we loaded
lea am_FetchBuffer_vb(a0),a3
moveq #CACHE_LINE_SIZE-1,d1 ; num samples in d1
; Index the table by sample value (as unsigned word)
clr.w d0
.next_sample:
move.b (a3)+,d0 ; next 8-bit sample.
move.w (a2,d0.w*2),d4 ; look up the volume adjusted word
add.w d4,(a4)+ ; accumulate onto the target buffer
dbra d1,.next_sample
; Now do the second step for the opposite side...
lsr.w #8,d5
dbra d3,.mix_samples
.update_channel:
sub.w #CACHE_LINE_SIZE,ac_SamplesLeft_w(a1)
bne.s .inc_sample_ptr
; Zero out the remaining channel state if we exhausted it
clr.l ac_SamplePtr_l(a1)
clr.w ac_LeftVol_b(a1)
bra.s .done_channel
.inc_sample_ptr:
add.l #CACHE_LINE_SIZE,ac_SamplePtr_l(a1)
.done_channel:
lea Aud_ChanelState_SizeOf_l(a1),a1
dbra d2,.next_channel
;
; Peak Level Analysis - Find the peak level of the left and right accumulation buffers so that we can normalise
; each one and convert to 8-bit data with a corresponding chanenel volume attenuation.
;
; Now we need to find the maximum absolute value of each accumulation buffer
lea am_AccumL_vw(a0),a4
lea am_AbsMaxL_w(a0),a2
; Same two-step trick as before, we process left then right consecutively
moveq #1,d3
moveq #9,d4
.next_buffer:
clr.w d0 ; d0 will contain the next absolute value from the buffer
clr.l d2
moveq #CACHE_LINE_SIZE-1,d1
.next_buffer_value:
move.w (a4)+,d0
bge.s .not_negative
neg.w d0
.not_negative:
cmp.w d0,d2
bgt.s .not_bigger
move.w d0,d2
.not_bigger:
dbra d1,.next_buffer_value
; peak value (15 bit) - we don't really need to store this but it's just for checking
move.w d2,(a2)+
; Now determine the normalisation factor. This is just the 15-bit absolute peak >> 9
; which gives us our offset into the _Aud_NormFactors_vw table
lsr.w d4,d2
move.w d2,2(a2)
dbra d3,.next_buffer
; Normalisation - For each 16-bit value in the accumulation buffer, scale by the normalisation value and then
; convert to 8 bit.
; Same two-step trick as before, we process left then right consecutively
moveq #1,d3
lea am_AccumL_vw(a0),a2
lea am_IndexL_w(a0),a3
lea am_LPacketSamplePtr_l(a0),a4
.normalize_next:
; get the table index into d1. If the index is on less than a power of 2, we will be using a shift method
moveq #1,d0
move.w (a3),d1 ; Index that we calculated in the analysis step
lea _Aud_NormFactors_vw,a1
move.w (a1,d1.w),d2 ; d2 contains normalisation factor
move.l 4(a4),a1 ; volume packet pointer in a1
add.w d1,d0 ; i + 1
move.w d0,(a1)+ ; write volume value
move.l a1,4(a4) ; write updated volume pointer
; Check for a perfoect power of 2..
and.w d1,d0 ; (i + 1) & i
beq .shift_normalise
.mul_normalise:
move.l (a4),a1
moveq #3,d4
.mul_norm_four:
; something like this. We assume short muls on 060
move.w (a2)+,d0
muls.w d2,d0
swap d0
move.b d0,d1
lsl.l #8,d1
move.w (a2)+,d0
muls.w d2,d0
swap d0
move.b d0,d1
lsl.l #8,d1
move.w (a2)+,d0
muls.w d2,d0
swap d0
move.b d0,d1
lsl.l #8,d1
move.w (a2)+,d0
muls.w d2,d0
swap d0
move.b d0,d1
move.l d1,(a1)+ ; long slow chip write here
dbra d4,.mul_norm_four
move.l a1,(a4)
.shift_normalise:
.done_channel_normalise:
lea 2(a3),a3 ; next index
lea 8(a4),a4 ; next buffer pair
dbra d3,.normalize_next
.finished:
movem.l (sp)+,d2/d3/d4/d5/a2/a3/a4
rts
; mixer in a0
Aud_Normalise_mul:
rts
Aud_Normalise_shift:
rts
_asm_sizeof_mixer::
dc.w Aud_Mixer_SizeOf_l