In [2]:
from numba import njit
import numpy as np
import pandas as pd
import time

pd.set_option('display.max_colwidth', -1)

In [48]:
%%time
input_matrix = np.random.randint(10, 25, size=(1000000, 1000)).astype(np.float64) 
input_matrix.shape

Wall time: 18.8 s


In [49]:
%%time
#another cpu iteration
#if input is dataframe instead of numpy arrays uncomment below lines and comment def line with input_matrix
#def vectorize_cpu_bet(dataframe=None):
#
#    input_matrix = dataframe.values.astype(np.float64)
#    features = list(dataframe)

def vectorize_cpu_bet(input_matrix, features=range(0, input_matrix.shape[1])):
    
    precision = np.float64
    rows, cols = input_matrix.shape
    input_matrix_sq = np.power(input_matrix, 2)
    input_matrix_transpose = input_matrix.T
    input_matrix_sq_transpose = input_matrix_sq.T

    single_mul_arr = np.matmul(input_matrix_transpose, input_matrix)
    double_mul_arr = np.matmul(input_matrix_sq_transpose, input_matrix_sq)
    
    length_arr = np.full((cols,1), rows, dtype=precision)
    sum_arr = np.matmul(input_matrix_transpose, np.ones((rows,1), dtype=precision))
    double_arr = np.diag(single_mul_arr).reshape(cols, 1)
    triple_arr = np.diag(np.matmul(input_matrix_sq_transpose, input_matrix)).reshape(cols, 1)
    quad_arr = np.diag(double_mul_arr).reshape(cols, 1)

    stack = np.dstack(np.broadcast_arrays(length_arr, sum_arr, double_arr, triple_arr, quad_arr, length_arr.T, sum_arr.T, double_arr.T, triple_arr.T, quad_arr.T, single_mul_arr, double_mul_arr)).tolist()
    #stack = np.dstack(np.broadcast_arrays(length_arr, sum_arr, double_arr, length_arr.T, sum_arr.T, double_arr.T, single_mul_arr, double_mul_arr)).tolist()
    dataframe = pd.DataFrame(stack, index=features)
    
    return dataframe
    
df = vectorize_cpu_bet(input_matrix)
print(df.shape)
print(len(df.loc[0,0]))

(1000, 1000)
12
Wall time: 4min 37s


In [46]:
%%time
def create_bet(df, col):

    """ BET function constructs the Basic Element Table for the Dataframe. BET is the key step for ARTML and
    it can be updated with the new data.
    BET function returns basic element table as Pandas Dataframe
    Notes:
    -----
    see 'Real Time Data Mining' by Prof. Sayad
    (https://www.researchgate.net/publication/265619432_Real_Time_Data_Mining)
    """
    start = time.time()
    col = col
    df_matrix = df
    l = len(col)

    idx = np.array([5,6,7,8,9,0,1,2,3,4,10,11])
    bet={}
    
    
    First_five = np.empty([l,5])
    x = np.array([[np.zeros(2) for x in range(l)] for y in range(l)])
    for i in range(l):
        b= np.array(df_matrix[:,i])
        First_five[i] = np.array([len(b), b.sum(), (b**2).sum(), (b**3).sum(), (b**4).sum()])
        
    
        for j in range(i,l):
            y= np.array(df_matrix[:,j])
            z= np.array(df_matrix[:,i])

            """
            This code makes calculations for all the basic elements in the table. They are appended to
            a lists of a dictionary.
            """
            
            x[i,j] = np.array([(z*y).sum(), ((z*y)**2).sum()])

            x[j,i] = x[i,j]
    '''  
    for i in range(l):
        z= np.array(df_matrix[:,i])
        First_ten[i] = np.array([len(z), z.sum(), (z**2).sum(), (z**3).sum(), (z**4).sum()])
    '''
    
    newarray = np.array([[np.zeros(12) for x in range(l)] for y in range(l)]) 
    for i in range(l):
        bet[i] = []
        
        for j in range(l):
            if(i==j):
                newarray[i][j] = np.append(np.concatenate((First_five[i],First_five[i])),x[i][i],axis =0)
                
            else:
                newarray[i][j] = np.append(np.concatenate((First_five[i],First_five[j])),x[i][j],axis =0)
                
        
        for j in range(l):
            bet[i].append(newarray[j,i])
        

    result = pd.DataFrame(bet, index=col)
    result.columns = col
    
    
    end = time.time()
    print(end- start)
    return(result)
  
bet_df = create_bet(input_matrix, range(0,input_matrix.shape[1]))
print(bet_df.values.shape)

23.181957960128784
(100, 100)
Wall time: 23.2 s


In [47]:
np.sum(np.sum(df.values - bet_df.values))

0.0

In [33]:
df.head(1)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,90,91,92,93,94,95,96,97,98,99
0,"[100000.0, 1701701.0, 30819669.0, 587835743.0, 11683323969.0, 100000.0, 1701701.0, 30819669.0, 587835743.0, 11683323969.0, 30819669.0, 11683323969.0]","[100000.0, 1701701.0, 30819669.0, 587835743.0, 11683323969.0, 100000.0, 1698784.0, 30726514.0, 585451114.0, 11625700486.0, 28900310.0, 9460860488.0]","[100000.0, 1701701.0, 30819669.0, 587835743.0, 11683323969.0, 100000.0, 1699388.0, 30754176.0, 586318832.0, 11649008568.0, 28908027.0, 9466026993.0]","[100000.0, 1701701.0, 30819669.0, 587835743.0, 11683323969.0, 100000.0, 1700375.0, 30779313.0, 586837679.0, 11659663761.0, 28937935.0, 9489696503.0]","[100000.0, 1701701.0, 30819669.0, 587835743.0, 11683323969.0, 100000.0, 1700597.0, 30792299.0, 587257997.0, 11670298139.0, 28933467.0, 9483632351.0]","[100000.0, 1701701.0, 30819669.0, 587835743.0, 11683323969.0, 100000.0, 1699741.0, 30756687.0, 586198303.0, 11642871915.0, 28926349.0, 9481684383.0]","[100000.0, 1701701.0, 30819669.0, 587835743.0, 11683323969.0, 100000.0, 1700193.0, 30778559.0, 586941693.0, 11664777527.0, 28928978.0, 9483617712.0]","[100000.0, 1701701.0, 30819669.0, 587835743.0, 11683323969.0, 100000.0, 1699767.0, 30753987.0, 586018725.0, 11635685679.0, 28929678.0, 9484661912.0]","[100000.0, 1701701.0, 30819669.0, 587835743.0, 11683323969.0, 100000.0, 1701742.0, 30828282.0, 588202396.0, 11694515070.0, 28960188.0, 9504156578.0]","[100000.0, 1701701.0, 30819669.0, 587835743.0, 11683323969.0, 100000.0, 1702183.0, 30846963.0, 588778387.0, 11710093635.0, 28968014.0, 9508704718.0]",...,"[100000.0, 1701701.0, 30819669.0, 587835743.0, 11683323969.0, 100000.0, 1700474.0, 30778562.0, 586704254.0, 11653681178.0, 28942425.0, 9490320937.0]","[100000.0, 1701701.0, 30819669.0, 587835743.0, 11683323969.0, 100000.0, 1701390.0, 30811346.0, 587582526.0, 11674662866.0, 28944996.0, 9488388044.0]","[100000.0, 1701701.0, 30819669.0, 587835743.0, 11683323969.0, 100000.0, 1700720.0, 30791276.0, 587138036.0, 11666027804.0, 28941181.0, 9487734399.0]","[100000.0, 1701701.0, 30819669.0, 587835743.0, 11683323969.0, 100000.0, 1700161.0, 30774809.0, 586786345.0, 11659947977.0, 28931383.0, 9483680775.0]","[100000.0, 1701701.0, 30819669.0, 587835743.0, 11683323969.0, 100000.0, 1702950.0, 30876450.0, 589627458.0, 11731959666.0, 28985366.0, 9524631430.0]","[100000.0, 1701701.0, 30819669.0, 587835743.0, 11683323969.0, 100000.0, 1699489.0, 30750763.0, 586136257.0, 11644157083.0, 28921787.0, 9478022565.0]","[100000.0, 1701701.0, 30819669.0, 587835743.0, 11683323969.0, 100000.0, 1699909.0, 30766013.0, 586557925.0, 11654698685.0, 28921034.0, 9474680316.0]","[100000.0, 1701701.0, 30819669.0, 587835743.0, 11683323969.0, 100000.0, 1697791.0, 30702137.0, 585059083.0, 11622294425.0, 28895485.0, 9466539901.0]","[100000.0, 1701701.0, 30819669.0, 587835743.0, 11683323969.0, 100000.0, 1702031.0, 30839037.0, 588504449.0, 11702117217.0, 28965574.0, 9507519246.0]","[100000.0, 1701701.0, 30819669.0, 587835743.0, 11683323969.0, 100000.0, 1700455.0, 30791117.0, 587345329.0, 11675672057.0, 28934859.0, 9487458189.0]"


In [34]:
bet_df.head(1)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,90,91,92,93,94,95,96,97,98,99
0,"[100000.0, 1701701.0, 30819669.0, 587835743.0, 11683323969.0, 100000.0, 1701701.0, 30819669.0, 587835743.0, 11683323969.0, 30819669.0, 11683323969.0]","[100000.0, 1701701.0, 30819669.0, 587835743.0, 11683323969.0, 100000.0, 1698784.0, 30726514.0, 585451114.0, 11625700486.0, 28900310.0, 9460860488.0]","[100000.0, 1701701.0, 30819669.0, 587835743.0, 11683323969.0, 100000.0, 1699388.0, 30754176.0, 586318832.0, 11649008568.0, 28908027.0, 9466026993.0]","[100000.0, 1701701.0, 30819669.0, 587835743.0, 11683323969.0, 100000.0, 1700375.0, 30779313.0, 586837679.0, 11659663761.0, 28937935.0, 9489696503.0]","[100000.0, 1701701.0, 30819669.0, 587835743.0, 11683323969.0, 100000.0, 1700597.0, 30792299.0, 587257997.0, 11670298139.0, 28933467.0, 9483632351.0]","[100000.0, 1701701.0, 30819669.0, 587835743.0, 11683323969.0, 100000.0, 1699741.0, 30756687.0, 586198303.0, 11642871915.0, 28926349.0, 9481684383.0]","[100000.0, 1701701.0, 30819669.0, 587835743.0, 11683323969.0, 100000.0, 1700193.0, 30778559.0, 586941693.0, 11664777527.0, 28928978.0, 9483617712.0]","[100000.0, 1701701.0, 30819669.0, 587835743.0, 11683323969.0, 100000.0, 1699767.0, 30753987.0, 586018725.0, 11635685679.0, 28929678.0, 9484661912.0]","[100000.0, 1701701.0, 30819669.0, 587835743.0, 11683323969.0, 100000.0, 1701742.0, 30828282.0, 588202396.0, 11694515070.0, 28960188.0, 9504156578.0]","[100000.0, 1701701.0, 30819669.0, 587835743.0, 11683323969.0, 100000.0, 1702183.0, 30846963.0, 588778387.0, 11710093635.0, 28968014.0, 9508704718.0]",...,"[100000.0, 1701701.0, 30819669.0, 587835743.0, 11683323969.0, 100000.0, 1700474.0, 30778562.0, 586704254.0, 11653681178.0, 28942425.0, 9490320937.0]","[100000.0, 1701701.0, 30819669.0, 587835743.0, 11683323969.0, 100000.0, 1701390.0, 30811346.0, 587582526.0, 11674662866.0, 28944996.0, 9488388044.0]","[100000.0, 1701701.0, 30819669.0, 587835743.0, 11683323969.0, 100000.0, 1700720.0, 30791276.0, 587138036.0, 11666027804.0, 28941181.0, 9487734399.0]","[100000.0, 1701701.0, 30819669.0, 587835743.0, 11683323969.0, 100000.0, 1700161.0, 30774809.0, 586786345.0, 11659947977.0, 28931383.0, 9483680775.0]","[100000.0, 1701701.0, 30819669.0, 587835743.0, 11683323969.0, 100000.0, 1702950.0, 30876450.0, 589627458.0, 11731959666.0, 28985366.0, 9524631430.0]","[100000.0, 1701701.0, 30819669.0, 587835743.0, 11683323969.0, 100000.0, 1699489.0, 30750763.0, 586136257.0, 11644157083.0, 28921787.0, 9478022565.0]","[100000.0, 1701701.0, 30819669.0, 587835743.0, 11683323969.0, 100000.0, 1699909.0, 30766013.0, 586557925.0, 11654698685.0, 28921034.0, 9474680316.0]","[100000.0, 1701701.0, 30819669.0, 587835743.0, 11683323969.0, 100000.0, 1697791.0, 30702137.0, 585059083.0, 11622294425.0, 28895485.0, 9466539901.0]","[100000.0, 1701701.0, 30819669.0, 587835743.0, 11683323969.0, 100000.0, 1702031.0, 30839037.0, 588504449.0, 11702117217.0, 28965574.0, 9507519246.0]","[100000.0, 1701701.0, 30819669.0, 587835743.0, 11683323969.0, 100000.0, 1700455.0, 30791117.0, 587345329.0, 11675672057.0, 28934859.0, 9487458189.0]"


ValueError: setting an array element with a sequence.