In [1]:
!pip install git+https://github.com/andreinechaev/nvcc4jupyter.git
# !pip install -q kaggle

Collecting git+https://github.com/andreinechaev/nvcc4jupyter.git
  Cloning https://github.com/andreinechaev/nvcc4jupyter.git to /tmp/pip-req-build-2iv86d3i
  Running command git clone --filter=blob:none --quiet https://github.com/andreinechaev/nvcc4jupyter.git /tmp/pip-req-build-2iv86d3i
  Resolved https://github.com/andreinechaev/nvcc4jupyter.git to commit 0a71d56e5dce3ff1f0dd2c47c29367629262f527
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: NVCCPlugin
  Building wheel for NVCCPlugin (setup.py) ... [?25l[?25hdone
  Created wheel for NVCCPlugin: filename=NVCCPlugin-0.0.2-py3-none-any.whl size=4293 sha256=ff63a332a2d85a8b7cb56534b1dd9a74e4971aca2a515a028b3d52c990bdd57a
  Stored in directory: /tmp/pip-ephem-wheel-cache-r6z2zzf_/wheels/a8/b9/18/23f8ef71ceb0f63297dd1903aedd067e6243a68ea756d6feea
Successfully built NVCCPlugin
Installing collected packages: NVCCPlugin
Successfully installed NVCCPlugin-0.0.2


In [2]:
# import urllib.request
# url = "https://raw.githubusercontent.com/ben-strasser/fast-cpp-csv-parser/master/csv.h"
# urllib.request.urlretrieve(url, "csv.h")

In [3]:
%load_ext nvcc_plugin

created output directory at /content/src
Out bin /content/result.out


In [40]:
helper = '''
#include <iostream>
#include <fstream>
#include <sstream>
#include <string>
#include <cuda_runtime.h>
#define BLOCKSIZE 1024
#define GRIDSIZE 65535
using namespace std;

__global__
void mean(float *x, float *ret, int n)
{
  // Grid Stride for partial sum
  int index = blockDim.x * blockIdx.x + threadIdx.x;
  int stride = blockDim.x * gridDim.x;
  for(int i = index; i < n; i+= stride){
    atomicAdd(ret, x[i]/n);
  }
}

__global__
void std_dev(float *x, float *ret, float mean, int n)
{
  int index = blockDim.x * blockIdx.x + threadIdx.x;
  int stride = blockDim.x * gridDim.x;
  for(int i = index; i < n; i+= stride){
    atomicAdd(ret, pow((x[i] - mean), 2) / n);
  }
  __syncthreads();
  if(threadIdx.x == 0 && blockIdx.x == 0){
    *ret = sqrt(*ret);
  }
}

__global__
void normalize(float *x, float std, float mean, int n)
{
  int index = blockIdx.x * blockDim.x + threadIdx.x;
  int stride = blockDim.x * gridDim.x;
  for (int i = index; i < n; i += stride) {
    x[i] = (x[i] - mean) / std;
  }
}

__global__
void corr(float *x, float *y, float* ret, int n)
{
  int index = blockIdx.x * blockDim.x + threadIdx.x;
  int stride = blockDim.x * gridDim.x;
  for (int i = index; i < n; i += stride) {
    atomicAdd(ret, x[i] * y[i] / n);
  }
}

int read_csv(float *x, float *y, int N, int xCol, int yCol, int numCol, string filename) {
  std::string line;
  std::ifstream file(filename);
  if (!file.is_open() || numCol <= xCol || numCol <= yCol) {
    std::cerr << "Error opening file: " << filename << std::endl;
    return 1;
  }
  if(yCol + 1 == numCol){
    yCol = 0;
  } else {
    yCol++;
  }
  if(xCol + 1 == numCol){
    xCol = 0;
  } else {
    xCol++;
  }
  std::string cell;
  int count = 0;
  // Read and ignore the first line (header)
  std::getline(file, line);
  int idx = 0;
  while (std::getline(file, line) && idx < N) {
    std::stringstream lineStream(line);
    while (std::getline(lineStream, cell, ',')) {
      count++;
      if (count % numCol == xCol) {
        x[idx] = static_cast<float>(stod(cell));
        // cout << x[idx] << endl;
      } else if (count % numCol == yCol) {
        y[idx] = static_cast<float>(stod(cell));
        // cout << y[idx] << endl;
        idx++;
      }
    }
  }
  file.close();
  return 0;
}
'''

main = '''
#include <iostream>
#include <fstream>
#include <sstream>
#include <string>
#include "helper.h"
using namespace std;

int main(void)
{
  int N = 15000;
  int blocks_per_grid = GRIDSIZE;
  int threads_per_block = BLOCKSIZE;

  float *x, *y, *corr_con, *x_mean, *y_mean, *x_std, *y_std;

  cudaMallocManaged(&x, N * sizeof(float));
  cudaMallocManaged(&y, N * sizeof(float));
  cudaMallocManaged(&x_mean, sizeof(float));
  cudaMallocManaged(&y_mean, sizeof(float));
  cudaMallocManaged(&x_std, sizeof(float));
  cudaMallocManaged(&y_std, sizeof(float));
  cudaMallocManaged(&corr_con, sizeof(float));
  std::string filename = "/content/sample_data/california_housing_train.csv";
  int fail = read_csv(x, y, N, 7, 8, 9, filename);
  if(fail){
    return 1;
  }
  // Arr 1 to fill, Arr 2 to fill, size of arrs, col1 idx, col2 idx,
  // num coloumns, and name of file (idx are zero-idx)

  mean<<<blocks_per_grid, threads_per_block>>>(x, x_mean, N);
  mean<<<blocks_per_grid, threads_per_block>>>(y, y_mean, N);

  cudaDeviceSynchronize();

  std_dev<<<blocks_per_grid, threads_per_block>>>(x, x_std, *x_mean, N);
  std_dev<<<blocks_per_grid, threads_per_block>>>(y, y_std, *y_mean, N);

  cudaDeviceSynchronize();

  normalize<<<blocks_per_grid, threads_per_block>>>(x, *x_std, *x_mean, N);
  normalize<<<blocks_per_grid, threads_per_block>>>(y, *y_std, *y_mean, N);

  cudaDeviceSynchronize();

  corr<<<blocks_per_grid, threads_per_block>>>(x, y, corr_con, N);

  cudaDeviceSynchronize();


  float y_cept = y_std[0] * (-1 * (*corr_con) * (x_mean[0] / x_std[0]) + y_mean[0] / y_std[0]);
  float slope = (*corr_con) * y_std[0] / x_std[0];

  /*
  cout << x_mean[0] << endl;
  cout << y_mean[0] << endl;
  cout << x_std[0] << endl;
  cout << y_std[0] << endl;
  cout << corr_con[0] << endl;
  cout << y_cept << endl;
  cout << slope << endl;
  */

  cout << "The slope of the regression line is " << slope << " and the";
  cout << " y-intercept is " << y_cept << " and the r_value is " << corr_con[0];
  cout << "." << endl;


  cudaFree(x);
  cudaFree(y);
  cudaFree(x_mean);
  cudaFree(y_mean);
  cudaFree(x_std);
  cudaFree(y_std);
  cudaFree(corr_con);
  return 0;
}'''

file1 = open("lin_reg.cu", "w")
file1.write(main)
file1.close()
file1 = open("helper.h", "w")
file1.write(helper)
file1.close()

In [41]:
!nvcc lin_reg.cu -o lin_reg
!./lin_reg

The slope of the regression line is 41710.1 and the y-intercept is 40082.1 and the r_value is 0.708215.


In [7]:
!nvprof ./lin_reg



In [33]:
import pandas as pd
from scipy.stats import linregress
# Replace 'path/to/your/file.csv' with the actual path to your CSV file
file_path = '/content/sample_data/california_housing_train.csv'

# Read the CSV file into a pandas DataFrame
df = pd.read_csv(file_path)

# Select the first 2400 rows and the last two columns
selected_data = df.iloc[:15000, -2:]

# Calculate the mean of each column individually
slope, intercept, r_value, p_value, std_err = linregress(selected_data.iloc[:, 0], selected_data.iloc[:, 1])

# print the slope and intercept of the regression line
print(f"The slope of the regression line is {slope:.2f} and the intercept is {intercept:.2f} and the r_value is {r_value:.2f}.")

The slope of the regression line is 41710.12 and the intercept is 40082.47 and the r_value is 0.71.
