In [1]:
%%writefile lab/code1.cpp
//==============================================================
// Copyright © Intel Corporation
//
// SPDX-License-Identifier: MIT
// =============================================================
#include <sycl/sycl.hpp>

using namespace sycl;

#include <CL/sycl.hpp>
#include <iostream>
#include <CL/sycl.hpp>
#include <CL/sycl.hpp>
#include <iostream>


#include <CL/sycl.hpp>
#include <chrono>
#include <iostream>
#include <thread>
#include <cstdlib>

const int n = 3;  // Number of features
const int m = 5;  // Number of data points

using namespace sycl;

// Define matrix-vector multiplication function with transpose using SYCL
void matrix_vector_multiply_transpose(const float X[], float X_T[], const float w[], float res[], int m, int n, queue &q) {
    auto R = range<2>(m, n);
    auto A = buffer<float, 2>(X, R);  // Buffer for input matrix X
    auto AT = buffer<float, 2>(X_T, R);  // Buffer for transpose
    auto W = buffer<float, 1>(w, range<1>(n));
    auto Result = buffer<float, 1>(res, range<1>(m));

    q.submit([&](handler &cgh) {
        auto X_acc = A.get_access<access::mode::read>(cgh);
        auto X_T_acc = AT.get_access<access::mode::write>(cgh);
        auto w_acc = W.get_access<access::mode::read>(cgh);
        auto res_acc = Result.get_access<access::mode::write>(cgh);

        cgh.parallel_for(R, [=](id<2> index) {
            int i = index[0];
            int j = index[1];
            X_T_acc[index] = X_acc[cl::sycl::id<2>(j, i)]; // Transpose matrix
        });
    });
    q.wait();
}

// Define element-wise vector subtraction function using SYCL
void subtract_vectors(const float vec1[], float res2[], int m, queue &q) {
    auto R = range<1>(m);
    auto V1 = buffer<float, 1>(vec1, R);
    auto Res = buffer<float, 1>(res2, R);

    q.submit([&](handler &cgh) {
        auto v1_acc = V1.get_access<access::mode::read>(cgh);
        auto res_acc = Res.get_access<access::mode::write>(cgh);

        cgh.parallel_for(R, [=](id<1> index) {
            res_acc[index] -= v1_acc[index];
        });
    });
    q.wait();
}

// Calculate the norm of a vector using SYCL
float calculate_vector_norm(const float v[], int n, queue &q) {
    auto R = range<1>(n);
    auto V = buffer<float, 1>(v, R);

    auto result = buffer<float, 1>(range<1>(1));

    q.submit([&](handler &cgh) {
        auto v_acc = V.get_access<access::mode::read>(cgh);
        auto result_acc = result.get_access<access::mode::discard_write>(cgh);

        cgh.parallel_for(R, [=](id<1> index) {
            result_acc[0] += v_acc[index] * v_acc[index];
        });
    });
    q.wait();

    float v_norm = 0.0f;
    auto result_acc = result.get_access<access::mode::read>();
    v_norm = std::sqrt(result_acc[0]);

    return v_norm;
}

// Function to perform your computation
void performComputation(queue &q) {
    float X[15] = {
        1.0f, 2.0f, 3.0f,
        4.0f, 5.0f, 6.0f,
        7.0f, 8.0f, 9.0f,
        10.0f, 11.0f, 12.0f,
        13.0f, 14.0f, 15.0f
    };

    float X_T[15] = {0.0f};  // Initialize to zeros
    float w[3] = {0.1f, 0.1f, 0.1f};
    float res[5] = {0.0f};
    float Y[5] = {0.0f, 1.0f, 0.0f, 1.0f, 1.0f};
    float gradient_update[3];  // Define gradient_update here

    float alpha = 0.05f;  // Learning rate
    int max_iterations = 10;
    int iteration = 0;
    float epsilon_sq = 0.001f * 0.001f;

    while (1) {
        matrix_vector_multiply_transpose(X, X_T, w, res, m, n, q);

        for (int i = 0; i < m; i++) {
            res[i] = 1.0f / (1.0f + std::exp(-res[i]));
        }

        // Calculate gradient and update weights
        matrix_vector_multiply_transpose(X, X_T, res, gradient_update, m, n, q);
        float gradient[n];
        for (int i = 0; i < n; i++) {
            gradient[i] = gradient_update[i];
        }

        // Subtract vectors here
        subtract_vectors(Y, res, m, q);

        for (int i = 0; i < n; i++) {
            w[i] -= alpha * gradient[i];
        }
        
        // Print the updated weight vector
        std::cout << "Iteration " << iteration << " - Updated Weight Vector (w*):\n";
        for (int i = 0; i < n; i++) {
            std::cout << w[i] << " ";
        }
        std::cout << std::endl;

        // Calculate the gradient norm
        float grad_norm = calculate_vector_norm(gradient, n, q);

        // Check the termination conditions
        if (grad_norm * grad_norm <= epsilon_sq || iteration >= max_iterations) {
            break;
        }

        iteration++;
    }
}

int main() {
    const int maxCores = 8;  // Set the maximum number of CPU cores to test

    for (int numCores = 1; numCores <= maxCores; numCores++) {
        std::cout << "Number of CPU cores: " << numCores << std::endl;

        // Set the number of CPU cores explicitly using 'OMP_NUM_THREADS' environment variable
        std::string envVarName = "OMP_NUM_THREADS";
        std::string envVarValue = std::to_string(numCores);
        if (setenv(envVarName.c_str(), envVarValue.c_str(), 1) != 0) {
            std::cerr << "Error setting environment variable." << std::endl;
            return 1;
        }

        auto start = std::chrono::high_resolution_clock::now();

        // Create a SYCL queue without specifying in-order property
        queue q;

        // Perform your computation
        performComputation(q);

        auto end = std::chrono::high_resolution_clock::now();
        std::chrono::duration<double> duration = end - start;
        std::cout << "Time: " << duration.count() << " seconds" << std::endl;
    }

    return 0;
}






















   





Writing lab/code1.cpp


In [None]:
! chmod 755 q; chmod 755 run_simple2.sh;if [ -x "$(command -v qsub)" ]; then ./q run_simple2.sh; else ./run_simple2.sh; fi