<?xml version="1.0" encoding="UTF-8"?>
<commit>
  <added type="array"/>
  <modified type="array">
    <modified>
      <diff>@@ -31,18 +31,16 @@ int main(int argc, char *argv[])
     else
         cudaSetDevice( cutGetMaxGflopsDeviceId() );
 
-    int result = MLMain(argc, argv);
-    cutilExit(argc, argv);
-    return result;
+    return MLMain(argc, argv);
 }
 
 void cuFourier1D (double *h_A, long n)
 {
-    float norm = 1.0/sqrt((float) n);
-    int mem_size = sizeof(Complex) * n;
+    double norm = 1.0/sqrt((double) n);
+    long mem_size = sizeof(Complex) * n;
     
     // Allocate host memory for the signal
-    Complex* h_signal = (Complex*)malloc(sizeof(Complex) * n);
+    Complex* h_signal = (Complex*)malloc(mem_size);
     
     // Initalize the memory for the signal
     for (long i = 0; i &lt; n; ++i) {
@@ -54,35 +52,41 @@ void cuFourier1D (double *h_A, long n)
     Complex* d_signal;
     cutilSafeCall(cudaMalloc((void**)&amp;d_signal, mem_size));
     // Copy host memory to device
-    cutilSafeCall(cudaMemcpy(d_signal, h_signal, mem_size,
-                              cudaMemcpyHostToDevice));
+    cutilSafeCall(cudaMemcpy(d_signal, h_signal, mem_size, cudaMemcpyHostToDevice));
                               
     // CUFFT plan
     cufftHandle plan;
     cufftSafeCall(cufftPlan1d(&amp;plan, n, CUFFT_C2C, 1));
     
     // Transform signal
-    cufftSafeCall(cufftExecC2C(plan, (cufftComplex *)d_signal, (cufftComplex *)d_signal, CUFFT_FORWARD));
+    cufftSafeCall(cufftExecC2C(plan, (cufftComplex *)d_signal, (cufftComplex *)d_signal, CUFFT_INVERSE));
     
     // Copy device memory to host
     Complex* h_convolved_signal = h_signal;
-    cutilSafeCall(cudaMemcpy(h_convolved_signal, d_signal, mem_size,
-                              cudaMemcpyDeviceToHost));
+    cutilSafeCall(cudaMemcpy(h_convolved_signal, d_signal, mem_size, cudaMemcpyDeviceToHost));
+
+    // Release d_signal
+    cutilSafeCall(cudaFree(d_signal));
     
     // Destroy CUFFT context
     cufftSafeCall(cufftDestroy(plan));
     
     // Return transformed signal to Mathematica as a Complex List
-    MLPutFunction(stdlink,&quot;List&quot;,n);
-    for (long i = 0; i &lt; n; i++) {
-        MLPutFunction(stdlink,&quot;Complex&quot;,2);
-        MLPutFloat(stdlink,h_convolved_signal[i].x*norm);
-        MLPutFloat(stdlink,h_convolved_signal[i].y*norm);
-    }
+    MLPutFunction(stdlink, &quot;Map&quot;, 2);
+    MLPutFunction(stdlink, &quot;Function&quot;, 2);
+    MLPutFunction(stdlink, &quot;List&quot;, 1);
+    MLPutSymbol(stdlink, &quot;x&quot;);
+    MLPutFunction(stdlink, &quot;Apply&quot;, 2);
+    MLPutSymbol(stdlink, &quot;Complex&quot;);
+    MLPutSymbol(stdlink, &quot;x&quot;);
+    MLPutFunction(stdlink, &quot;Partition&quot;, 2);
+    MLPutFunction(stdlink, &quot;Times&quot;, 2);
+    MLPutReal(stdlink, norm);
+    MLPutReal32List(stdlink, (float*)h_convolved_signal, 2*n);
+    MLPutInteger(stdlink, 2);
     
     // Cleanup memory
     free(h_signal);
-    cutilSafeCall(cudaFree(d_signal));
     
     cudaThreadExit();
-}
\ No newline at end of file
+}</diff>
      <filename>src/cuFourier/cuFourier.cu</filename>
    </modified>
  </modified>
  <removed type="array"/>
  <parents type="array">
    <parent>
      <id>cc4833f782001f3277ca7121fce531b9897c328b</id>
    </parent>
  </parents>
  <author>
    <name>kashif</name>
    <email>kashif@nomad-labs.com</email>
  </author>
  <url>http://github.com/kashif/mathematica_cuda/commit/6b6bda290f010ba087a900466ab43e41e6984815</url>
  <id>6b6bda290f010ba087a900466ab43e41e6984815</id>
  <committed-date>2009-09-21T13:46:08-07:00</committed-date>
  <authored-date>2009-09-21T13:46:08-07:00</authored-date>
  <message>Sending Complex numbers as an array.

Suggestion from Patrick Scheibe.</message>
  <tree>d2b29b636309bb6a7382fa4a6f577bb69d40a4a7</tree>
  <committer>
    <name>kashif</name>
    <email>kashif@nomad-labs.com</email>
  </committer>
</commit>
