In [53]:
%%writefile main.cu
#include <iostream>

__device__ float dot_product(float* row, float* col, int size) {

  float result = 0.;
  for (int i = 0; i < size; i++) {
    result += (row[i] * col[i]);
  }

  return result;
}

class Vector3 {

  public:
    __device__ Vector3();
    Vector3(float, float, float);
    float x;
    float y;
    float z;
    __device__ void normalize();
};

__device__ void Vector3::normalize() {

    float magnitude = (x * x) + (y * y) + (z * z);
    x = x / magnitude;
    y = y / magnitude;
    z = z / magnitude;
}

__device__ Vector3::Vector3(): x(0.), y(0.), z(0.) {
}

Vector3::Vector3(float x, float y, float z): x(x), y(y), z(z) {
}

class Screw {

  public:
    Screw();
    Screw(Vector3, Vector3);
    //Screw(float*);

    Vector3 w;
    Vector3 v;
};

Screw::Screw(): w(Vector3(0., 0., 0.)), v(Vector3(0., 0., 0.)) {
}

Screw::Screw(Vector3 w, Vector3 v): w(w), v(v) {
}

class Transformation {

  public:
    static const int num_entries_in_matrix = 16;
    float values[num_entries_in_matrix];

    __device__ Transformation();
    __device__ ~Transformation();
    __device__ Transformation(float*);
    __device__ Transformation operator+(const Transformation&);
    __device__ Transformation operator*(const Transformation&);
    __device__ Transformation operator*(const float);
    __device__ Transformation bracket_w(const Vector3& w);
    __device__ Transformation bracket_w_squared(const Vector3& w);
    __device__ Vector3 operator*(const Vector3&);
    __device__ void set_translation(const Vector3&);
    __device__ float* get_row(int) const;
    __device__ float* get_column(int) const;
    void print() const;
};

__device__ Transformation::Transformation() {

  // Create the identity matrix.
  for (int i = 0; i < num_entries_in_matrix; i++) {
    values[i] = 0.0;
  }

  values[0] = 1.;
  values[5] = 1.;
  values[10] = 1.;
  values[15] = 1.;
}

__device__ Transformation::~Transformation() {
  //free(values);
}

__device__ Transformation::Transformation(float* input_values) {

  for (int i = 0; i < num_entries_in_matrix; i++) {
    this->values[i] = input_values[i];
  }
}

__device__ Transformation Transformation::operator+(const Transformation &right) {

  Transformation result = Transformation();

  for (int i = 0; i < num_entries_in_matrix; i++) {
    result.values[i] = values[i] + right.values[i];
  }

  result.values[num_entries_in_matrix - 1] = 1.0;
  return result;
}

__device__ Transformation Transformation::operator*(const Transformation &right) {

  Transformation result = Transformation();
  int num_entries_in_row = 4;

  for (int row_index = 0; row_index < num_entries_in_row; row_index++) {
    float* row = get_row(row_index);

    for (int col_index = 0; col_index < num_entries_in_row; col_index++) {
      float* column = right.get_column(col_index);
      result.values[(row_index * 4) + col_index] = dot_product(row, column, num_entries_in_row);
      free(column);
    }
    free(row);
  }

  return result;
}

__device__ Transformation Transformation::operator*(const float c) {

  Transformation result = Transformation();

  for (int i = 0; i < num_entries_in_matrix - 1; i++) {
    result.values[i] = values[i] * c;
  }

  return result;
}

__device__ Transformation Transformation::bracket_w(const Vector3& w) {

  Transformation result = Transformation();

  // First row.
  result.values[0] = 0.;
  result.values[1] = -w.z;
  result.values[2] = w.y;

  // Second row.
  result.values[4] = w.z;
  result.values[5] = 0;
  result.values[6] = -w.x;

  // Third row.
  result.values[8] = -w.y;
  result.values[9] = w.x;
  result.values[10] = 0.;

  return result;
}

__device__ Transformation Transformation::bracket_w_squared(const Vector3& w) {

  Transformation result = Transformation();

  // First row.
  result.values[0] = -(w.z * w.z) - (w.y * w.y);
  result.values[1] = w.y * w.x;
  result.values[2] = w.z * w.x;

  // Second row.
  result.values[4] = w.y * w.x;
  result.values[5] = -(w.z * w.z) - (w.x * w.x);
  result.values[6] = w.z * w.y;

  // Third row.
  result.values[8] = w.z * w.x;
  result.values[9] = w.z * w.y;
  result.values[10] = -(w.y * w.y) - (w.x * w.x);

  return result;
}

__device__ Vector3 Transformation::operator*(const Vector3& right) {

  Vector3 result = Vector3();
  result.x = (values[0] * right.x) + (values[1] * right.y) + (values[2] * right.z);
  result.y = (values[4] * right.x) + (values[5] * right.y) + (values[6] * right.z);
  result.z = (values[8] * right.x) + (values[9] * right.y) + (values[10] * right.z);
  return result;
}

__device__ void Transformation::set_translation(const Vector3& v) {
  values[3] = v.x;
  values[7] = v.y;
  values[11] = v.z;
}

__device__ float* Transformation::get_row(int index) const {

  float* row = ((float*)values) + (index * 4);
  float* copy = (float*) malloc(sizeof(float) * 4);
  for (int i = 0; i < 4; i++) {
    copy[i] = row[i];
  }

  return copy;
}

__device__ float* Transformation::get_column(int index) const {

  float* column = (float*) malloc(sizeof(float) * 4);
  for (int i = 0; i < 4; i++) {
    column[i] = values[index + (4 * i)];
  }

  return column;
}

void Transformation::print() const {

  for (int i = 0; i < num_entries_in_matrix; i++) {

    if (((i + 1) % 4) == 0) {
      std::cout << values[i] << std::endl;
    }
    else {
      std::cout << values[i] << ", ";
    }
  }
}

__device__ Transformation exponential_rotation(Vector3& w, float theta) {
  /* See page 113 of Modern Robotics.
  * I + sin θ[ωˆ] + (1 − cos θ)[ωˆ]2
  */

  Transformation identity = Transformation();
  Transformation sin_theta_w = Transformation().bracket_w(w) * sin(theta);
  Transformation cos_theta_w_squared = Transformation().bracket_w_squared(w) * (1.0 - cos(theta));

  return identity + sin_theta_w + cos_theta_w_squared;
  //return sin_theta_w;
  //return cos_theta_w_squared;
  //return identity;
}

__device__ Vector3 rigid_body_exponential_translation(Vector3& w, Vector3& v, float theta) {
  /* See page 113 of Modern Robotics.
   * (Iθ + (1 − cos θ)[ω] + (θ − sin θ)[ω]2)v
   * (A + B + C) * (v)
   * (3 x 3 matrix) * (3 x 1 matrix) = (3 x 1 matrix)
   */

  Transformation identity_theta = Transformation() * theta;
  Transformation sin_theta_w = Transformation().bracket_w(w) * (1.0 - cos(theta));
  Transformation cos_theta_w_squared = Transformation().bracket_w_squared(w) * (theta - sin(theta));

  return (identity_theta + sin_theta_w + cos_theta_w_squared) * v;
}

__device__ Transformation rigid_body_exponential(Screw& screw, float theta) {

  // Create the transformations.
  Transformation result = exponential_rotation(screw.w, theta);
  Vector3 translatation = rigid_body_exponential_translation(screw.w, screw.v, theta);

  result.set_translation(translatation);
  return result;
}

__device__ Transformation* forward_kinematics(Screw* screws, float* thetas, Transformation& home_position, int num_joints) {

  Transformation* transformations = (Transformation*) malloc(sizeof(Transformation) * num_joints);
  for (int i = 0; i < num_joints; i++) {
    screws[i].w.normalize();
    transformations[i] = rigid_body_exponential(screws[i], thetas[i]);
  }

  // Multiply each transformation together.
  Transformation result = transformations[0];
  for (int i = 1; i < num_joints; i++) {
    result = result * transformations[i];
  }

  result = result * home_position;

  //free(transformations);
  return &result;
}

__global__ void forward_kinematics_kernel(Screw* screw_axes, float* thetas, float* home_position_values, int num_joints, int num_problems, Transformation* result_transformations) {

  int inputs_index = (blockIdx.x * blockDim.x) + threadIdx.x;
  int result_index = inputs_index;
  int offset = blockDim.x * gridDim.x;

  Transformation home_position = Transformation(home_position_values);

  while (inputs_index < num_problems) {
    // Screw* screws, float* thetas, int num_joints, float* home_position_values
    result_transformations[result_index] = *forward_kinematics(screw_axes, thetas + (inputs_index * num_joints), home_position, num_joints);

    inputs_index += offset;
    result_index += offset;
  }
}

__global__ void inverse_kinematics_kernel(Screw* screw_axes, float* thetas, float* home_position_values, int num_joints, Transformation* result_transformations) {

  int inputs_index = threadIdx.x;
  int result_index = threadIdx.x;

  Transformation home_position = Transformation(home_position_values);

}


void create_joint_angles(float a, float b, float c, float d, float e, float f, float* output) {
  output[0] = a;
  output[1] = b;
  output[2] = c;
  output[3] = d;
  output[4] = e;
  output[5] = f;
}

int main() {

  // Robot info.
  int num_joints = 6;
  int num_configurations = 10550;

  Transformation* result_transformation_host = (Transformation*) malloc(sizeof(Transformation) * num_configurations);
  Transformation* result_transformation_device;
  cudaMalloc((void**)&result_transformation_device, sizeof(Transformation) * num_configurations);

  Screw* screw_axes_host = (Screw*) malloc(sizeof(Screw) * num_joints);
  Screw* screw_axes_device;
  cudaMalloc((void**)&screw_axes_device, sizeof(Screw) * num_joints);

  float* M_host = (float*) malloc(sizeof(float) * Transformation::num_entries_in_matrix);
  float* M_device;
  cudaMalloc((void**)&M_device, sizeof(float) * Transformation::num_entries_in_matrix);

  float* thetas_host = (float*) malloc(sizeof(float) * num_joints * num_configurations);
  float* thetas_device;
  cudaMalloc((void**)&thetas_device, sizeof(float) * num_joints * num_configurations);

  // UR5 Screw axes.
  // See page 148 of Modern Robotics.
  float H1 = 0.089;
  float H2 = 0.095;
  float L1 = 0.425;
  float L2 = 0.392;
  float W1 = 0.109;
  float W2 = 0.082;

  // Define the screw axes.
  screw_axes_host[0] = Screw(Vector3(0., 0., 1.), Vector3(0., 0., 0.));
  screw_axes_host[1] = Screw(Vector3(0., 1., 0.), Vector3(-H1, 0., 0.));
  screw_axes_host[2] = Screw(Vector3(0., 1., 0.), Vector3(-H1, 0., L1));
  screw_axes_host[3] = Screw(Vector3(0., 1., 0.), Vector3(-H1, 0., L1 + L2));
  screw_axes_host[4] = Screw(Vector3(0., 0., -1.), Vector3(-W1, L1 + L2, 0.));
  screw_axes_host[5] = Screw(Vector3(0., 1., 0.), Vector3(H2 - H1, 0., L1 + L2));

  // Define the home pose.
  M_host[0] = -1.;
  M_host[1] = 0.;
  M_host[2] = 0.;
  M_host[3] = L1 + L2;

  M_host[4] = 0.;
  M_host[5] = 0.;
  M_host[6] = 1.;
  M_host[7] = W1 + W2;

  M_host[8] = 0.;
  M_host[9] = 1.;
  M_host[10] = 0.;
  M_host[11] = H1 - H2;

  M_host[12] = 0.;
  M_host[13] = 0.;
  M_host[14] = 0.;
  M_host[15] = 1.;

  for (int i = 0; i < num_configurations; i++) {
    create_joint_angles(0., -3.14/2.0, 0., 0., 3.14/2.0, 0., thetas_host + (num_joints * i));
  }

  // Copy the screw axes onto the device.
  cudaMemcpy(screw_axes_device, screw_axes_host, sizeof(Screw) * num_joints, cudaMemcpyHostToDevice);

  // Copy M onto the device.
  cudaMemcpy(M_device, M_host, sizeof(float) * Transformation::num_entries_in_matrix, cudaMemcpyHostToDevice);

  // Copy the joint angles onto the device.
  cudaMemcpy(thetas_device, thetas_host, sizeof(float) * num_joints * num_configurations, cudaMemcpyHostToDevice);

  // Screw* screw_axes, float* thetas_device, float* home_position_values, int num_joints, Transformation* result_transformations
  forward_kinematics_kernel<<<10, 10>>>(screw_axes_device, thetas_device, M_device, num_joints, num_configurations, result_transformation_device);

  cudaMemcpy(result_transformation_host, result_transformation_device, sizeof(Transformation) * num_configurations, cudaMemcpyDeviceToHost);

  // Display one of the Transformations.
  result_transformation_host[num_configurations - 1].print();

  // Free the device memory.
  cudaFree(screw_axes_device);
  cudaFree(thetas_device);
  cudaFree(result_transformation_device);
  cudaFree(M_device);

  // Free the host memory.
  free(screw_axes_host);
  free(result_transformation_host);
  free(M_host);

  return 0;
}

Overwriting main.cu


In [54]:
%%script bash
nvcc main.cu -o forward_kinematics

    return &result;
           ^


    return &result;
           ^


    int inputs_index = threadIdx.x;
        ^

    int result_index = threadIdx.x;
        ^



In [55]:
%%script bash
./forward_kinematics

0
-6.34028e-07, -1, 0.000796258, 0.0957158
1, 0, 0.000796258, 0.109065
-0.000796258, 0.000796258, 0.999999, 0.987924
0, 0, 0, 1
