In [1]:
#importing libraries
import numpy as np
import pandas as pd
from numba import jit, njit

In [2]:
df = pd.read_csv('datasets/admissions.csv')
df.head()

Unnamed: 0,Serial No.,GRE Score,TOEFL Score,University Rating,SOP,LOR,CGPA,Research,Chance of Admit
0,1,337,118,4,4.5,4.5,9.65,1,0.92
1,2,324,107,4,4.0,4.5,8.87,1,0.76
2,3,316,104,3,3.0,3.5,8.0,1,0.72
3,4,322,110,3,3.5,2.5,8.67,1,0.8
4,5,314,103,2,2.0,3.0,8.21,0,0.65


In [3]:
df.shape

(500, 9)

In [None]:
#generate a random data set
def generate_dataset(row=1000, column=5):
    return pd.DataFrame(np.random.normal(50, 100, size=(row, column)))

generate_dataset().head()

In [4]:
def gpu_create_bet(df):
    
    #create BET
    @njit(parallel=True)
    def _bet_compute(features, feature_matrix):

        length = len(features)
        bet_matrix = []

        for row in range(length):
            bet_row = []
            for column in range(length):
                if column < row:
                    bet_row.append([0.0])
                    pass
                else:
                    feature_one = feature_matrix[:, row]
                    feature_two = feature_matrix[:, column]
                    bet_row.append([len(feature_one), feature_one.sum(), (feature_one**2).sum(), (feature_one**3).sum(), (feature_one**4).sum(), 
                                    len(feature_two), feature_two.sum(), (feature_two**2).sum(), (feature_two**3).sum(), (feature_two**4).sum(), 
                                    (feature_one * feature_two).sum(), ((feature_one * feature_two)**2).sum()])
            bet_matrix.append(bet_row)

        #copy upper half to lower half of the triangle
        for row in range(length):
            for column in range(length):
                if column >= row:
                    pass
                else:
                    bet_matrix[row][column] = bet_matrix[column][row]

        return(bet_matrix)
    
    features = list(df)
    feature_matrix = df.values
    _bet_matrix = _bet_compute(features, feature_matrix)
    result = pd.DataFrame(_bet_matrix, index=features)
    result.columns = features
    
    return result
    
BET = gpu_create_bet(df)
print(BET.shape)

(9, 9)


In [5]:
def gpu_compute_covariance(BET):

    #generate covariance matrix for the input BET using GPU
    @njit(parallel=True)
    def _compute_covariance(BET_matrix):

        length = len(BET_matrix)
        covariance_matrix = []

        for row in range(length):
            cov_row = []
            for column in range(length):
                if BET_matrix[row][column][0] != 0:
                    cov = (BET_matrix[row][column][10]-(((BET_matrix[row][column][1])*(BET_matrix[row][column][6]))/(BET_matrix[row][column][0])))/(BET_matrix[row][column][0])
                    cov_row.append(cov)
                else:
                    cov_row.append(np.nan)
            covariance_matrix.append(cov_row)

        return covariance_matrix
    
    features = list(BET)
    BET_matrix = np.array(BET.values.tolist())
    result = pd.DataFrame(_compute_covariance(BET_matrix), index=features)
    result.columns = features
    
    return result

gpu_compute_covariance(BET)

Unnamed: 0,Serial No.,GRE Score,TOEFL Score,University Rating,SOP,LOR,CGPA,Research,Chance of Admit
Serial No.,20833.25,-169.12,-124.262,-11.153,-19.627,-0.493,-6.47872,-0.382,0.17309
GRE Score,-169.12,127.325216,56.711376,8.190192,6.853472,5.473552,5.63066,3.15568,1.289279
TOEFL Score,-124.262,56.711376,36.915136,4.510112,3.876192,3.042072,2.975644,1.40848,0.678686
University Rating,-11.153,8.190192,4.510112,1.305004,0.823364,0.642824,0.486786,0.24216,0.111162
SOP,-19.627,6.853472,3.876192,0.823364,0.980124,0.607484,0.425991,0.20056,0.095499
LOR,-0.493,5.473552,3.042072,0.642824,0.607484,0.854744,0.356093,0.17096,0.084128
CGPA,-6.47872,5.63066,2.975644,0.486786,0.425991,0.356093,0.365067,0.150354,0.075175
Research,-0.382,3.15568,1.40848,0.24216,0.20056,0.17096,0.150354,0.2464,0.038206
Chance of Admit,0.17309,1.289279,0.678686,0.111162,0.095499,0.084128,0.075175,0.038206,0.019881


In [6]:
#GPU based matrix multiplication
@njit(parallel=True)
def _matrix_multiplication(x, y):
    return np.dot(x, y)

#GPU based matrix inversion
@njit(parallel=True)
def _matrix_inverse(x):
    return np.linalg.inv(x)

In [43]:
#Multi-Linear Regression

def multi_linear_regression(BET=None, target=''):
        
    features = list(BET)
    
    #removing the target column
    BET_features = BET.drop(index=target, columns=target)
    
    #get the covariance matrix
    cov_features = gpu_compute_covariance(BET_features).values
    cov_target = gpu_compute_covariance(BET)
    cov_target = cov_target.loc[target, cov_target.columns != target].values
    
    cov_inverse = _matrix_inverse(cov_features)
    
    Beta_array = _matrix_multiplication(cov_inverse, cov_target)

    mean_target = BET.loc[target, target][1] / BET.loc[target, target][0]
    
    #compute mean, GPU
    @njit(parallel=True)
    def _mean(BET_matrix, target_idx):
        m = []
        for feature_idx in range(len(BET_matrix)):
            if feature_idx != target_idx:
                m.append((BET_matrix[target_idx][feature_idx][1] / BET_matrix[target_idx][feature_idx][0]))
        return m
    
    mean_X = np.array(_mean(np.array(BET.values.tolist()), features.index(target))).astype(float)

    b0 = mean_target - _matrix_multiplication(Beta_array, mean_X)
    
    print(b0)
    return Beta_array

multi_linear_regression(BET.copy(), target='Chance of Admit ')

-1.3379983470495462


array([  8.68010696e-05,   1.92167454e-03,   3.19276146e-03,
         5.31642721e-03,   4.56606127e-03,   1.49151378e-02,
         1.15556128e-01,   2.25254076e-02])