forked from AerialX/PSL1GHT
/
main.c
128 lines (105 loc) · 3.81 KB
/
main.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
/*
* Sample program to illustrate a parallel algorithm manipulating a shared
* array.
*
* 6 threads are created
*
*
* The PPU performs the following tasks:
* - initialize a 4x6 array
* - create 6 threads. Each thread is assigned a different rank from 0 to 5.
* The threads also get the address of the array. They put themselves in
* a blocking waiting mode for a signal notification.
* - The PPU fills the array with the consecutive elements { 1, 2, .., 24 }.
* - The PPU sends a signal to the threads to unblock them.
* - Each thread reads a different vector of 4 integers from the array (using
* dma), multiplies all elements by 2 and sends the vector back to main
* storage.
*
* The original array contains the integers : { 1, 2, ... 24 } so the result
* is the values { 2, 4, ..., 48 }.
*
* All 24 multiplications are done in parallel:
* - Each SPU perform a vector multiplication of the { 2, 2, 2, 2 } vector with
* the read value, hence 4 multiplications per SPU.
* - All 6 SPUs do the same job in parallel, hence 4*6 for all SPUs.
*/
#include <psl1ght/lv2.h>
#include <psl1ght/lv2/spu.h>
#include <lv2/spu.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <malloc.h>
#include "spu.bin.h"
#include "spustr.h"
#define ptr2ea(x) ((u64)(void *)(x))
int main(int argc, const char* argv[])
{
sysSpuImage image;
u32 entry = 0;
u32 segmentcount = 0;
sysSpuSegment* segments;
u32 group_id;
Lv2SpuThreadAttributes attr = { ptr2ea("mythread"), 8+1, LV2_SPU_THREAD_ATTRIBUTE_NONE };
Lv2SpuThreadGroupAttributes grpattr = { 7+1, ptr2ea("mygroup"), 0, 0 };
Lv2SpuThreadArguments arg[6];
u32 cause, status;
int i;
spustr_t *spu = memalign(16, 6*sizeof(spustr_t));
uint32_t *array = memalign(16, 24*sizeof(uint32_t));
printf("Initializing 6 SPUs... ");
printf("%08x\n", lv2SpuInitialize(6, 0));
printf("Getting ELF information... ");
printf("%08x\n", sysSpuElfGetInformation(spu_bin, &entry, &segmentcount));
printf("\tEntry Point: %08x\n\tSegment Count: %08x\n", entry, segmentcount);
size_t segmentsize = sizeof(sysSpuSegment) * segmentcount;
segments = (sysSpuSegment*)malloc(segmentsize);
memset(segments, 0, segmentsize);
printf("Getting ELF segments... ");
printf("%08x\n", sysSpuElfGetSegments(spu_bin, segments, segmentcount));
printf("Loading ELF image... ");
printf("%08x\n", sysSpuImageImport(&image, spu_bin, 0));
printf("Creating thread group... ");
printf("%08x\n", lv2SpuThreadGroupCreate(&group_id, 6, 100, &grpattr));
printf("group id = %d\n", group_id);
/* create 6 spu threads */
for (i = 0; i < 6; i++) {
spu[i].rank = i;
spu[i].count = 6;
spu[i].sync = 0;
spu[i].array_ea = ptr2ea(array);
arg[i].argument1 = ptr2ea(&spu[i]);
printf("Creating SPU thread... ");
printf("%08x\n", lv2SpuThreadInitialize(&spu[i].id, group_id, i, &image, &attr, &arg[i]));
printf("thread id = %d\n", spu[i].id);
printf("Configuring SPU... %08x\n",
lv2SpuThreadSetConfiguration(spu[i].id, LV2_SPU_SIGNAL1_OVERWRITE|LV2_SPU_SIGNAL2_OVERWRITE));
}
printf("Starting SPU thread group... ");
printf("%08x\n", lv2SpuThreadGroupStart(group_id));
printf("Initial array: ");
for (i = 0; i < 24; i++) {
array[i] = i+1;
printf(" %d", array[i]);
}
printf("\n");
/* Send signal notification to waiting spus */
for (i = 0; i < 6; i++)
printf("Sending signal... %08x\n",
lv2SpuThreadWriteSignal(spu[i].id, 0, 1));
printf("Waiting for SPUs to return...\n");
for (i = 0; i < 6; i++)
while (spu[i].sync == 0);
printf("Output array: ");
for (i = 0; i < 24; i++)
printf(" %d", array[i]);
printf("\n");
printf("Joining SPU thread group... ");
printf("%08x\n", lv2SpuThreadGroupJoin(group_id, &cause, &status));
printf("cause=%d status=%d\n", cause, status);
printf("Closing image... %08x\n", sysSpuImageClose(&image));
free(array);
free(spu);
return 0;
}