Skip to content
Newer
Older
100644 104 lines (76 sloc) 3.16 KB
e016b05 Previous proposed changes won't do.
Zack Moratto authored
1 #include <cuda.h>
2 #include <iostream>
3
4 /* --------------------------- target code ------------------------------*/
5 #define THREAD_COUNT 128
6
7 struct params {
8 float *input;
9 float *output;
10 int n;
11 };
12
13 __global__ void flip_main (struct params p)
14 {
15 int i;
16 for (i = threadIdx.x; i < p.n; i += THREAD_COUNT) {
17 p.output[i] = 1.0f - p.input[i];
18 }
19 }
20
21 /* --------------------------- host code ------------------------------*/
22
23 // Invert a 1 channel, 32-bit floating point image.
24 void invert_image (float* img, int width, int height) {
25 cudaError_t cudaStat;
26 float* out = 0;
27 float* in = 0;
28 struct params funcParams;
29 int N = width * height;
30
31 cout << "Allocating GPU Memory.\n";
32 cudaStat = cudaMalloc ((void **)&in, N * sizeof(in[0]));
33 cudaStat = cudaMalloc ((void **)&out, N * sizeof(out[0]));
34
35 cout << "Copying data.\n";
36 cudaStat = cudaMemcpy (in, img, N * sizeof(img[0]), cudaMemcpyHostToDevice);
37
38 funcParams.output = out;
39 funcParams.input = in;
40 funcParams.n = N;
41
42 cout << "Running kernel.\n";
43 flip_main<<<1,THREAD_COUNT>>>(funcParams);
44
45 cout << "Copying result.\n";
46 cudaStat = cudaMemcpy (img, out, N * sizeof(out[0]), cudaMemcpyDeviceToHost);
47 }
48
49
50 //------------
51
52 /// Base class from which specific image resources derive.
53 class CudaImageResource {
54 float* m_buffer;
55 ImageFormat m_format;
56
57 public:
58
59 CudaImageResource(ImageFormat format):
60 m_format(format) {
61 int32 size = m_format.cols * m_format.rows * m_format.planes;
62 cudaStat = cudaMalloc ((void **)&m_buffer, size * sizeof(float));
63 }
64
65 virtual ~CudaImageResource() {
66 cudaFree(m_buffer);
67 };
68
69 /// Returns the number of columns in an image resource.
70 virtual int32 cols() const { return m_cols; }
71
72 /// Returns the number of rows in an image resource.
73 virtual int32 rows() const { return m_rows; }
74
75 /// Returns the number of planes in an image resource.
76 virtual int32 planes() const { return m_planes; }
77
78 /// Returns the number of channels in a image resource.
79 int32 channels() const { return num_channels( pixel_format() ); }
80
81 /// Returns the native pixel format of the resource.
82 virtual PixelFormatEnum pixel_format() const { return m_format.pixel_format; }
83
84 /// Returns the native channel type of the resource.
85 virtual ChannelTypeEnum channel_type() const { return m_format.channel_type; }
86
87 /// Read the image resource at the given location into the given buffer.
88 virtual void read( ImageBuffer const& buf, BBox2i const& bbox ) const {
89 cudaStat = cudaMemcpy (img, out, N * sizeof(out[0]), cudaMemcpyDeviceToHost);
90 }
91
92 /// Write the given buffer to the image resource at the given location.
93 virtual void write( ImageBuffer const& buf, BBox2i const& bbox ) {
94 cudaStat = cudaMemcpy (in, img, N * sizeof(img[0]), cudaMemcpyHostToDevice);
95 }
96
97 /// Returns the optimal block size/alignment for partial reads or writes.
98 virtual Vector2i native_block_size() const { return Vector2i(cols(),rows()); }
99
100 /// Force any changes to be written to the resource.
101 virtual void flush() {}
102
103 };
Something went wrong with that request. Please try again.