In [1]:
import numpy as np
import gc
import timeit
import torch 

# Concat large arrays 

In [2]:
real_shape = (206928225, 100)
testshape = (int(206928225/100), 100)
testshape
test = True
no_tries = 3

In [3]:
if test:
    shape = testshape
else:
    shape = real_shape

In [4]:
def fast_concat(se,pe,oe):
    
    assert se.shape == pe.shape, "Error! fast_concat with differing shapes"
    assert se.shape == oe.shape, "Error! fast_concat with differing shapes"
       
    x = np.empty((se.shape[0],se.shape[1]*3),dtype=np.float32)
    x[:,0:100] =se
    x[:,100:200] = pe
    x[:,200:] = oe
    
    return x

def insert_row_into_array_np(arr,row,col_ix):
    col = np.repeat(row,len(arr)).reshape((len(arr),col_ix[1]-col_ix[0]))
    arr[:,col_ix[0]:col_ix[1]] = col
    return arr

def insert_row_into_array_zero_np(arr,row,col_ix):
    col = np.repeat(row,len(arr)).reshape((len(arr),col_ix[1]-col_ix[0]))
    arr[:,col_ix[0]:col_ix[1]] = 0
    arr[:,col_ix[0]:col_ix[1]] += col
    
    return arr

def insert_row_into_array_loop_np(arr,row,col_ix):
    for i in range(len(arr)):
        arr[:,col_ix[0]:col_ix[1]][i] = row
    return arr

def insert_row_into_array_loop_zero_np(arr,row,col_ix):
    arr[:,col_ix[0]:col_ix[1]] = 0
    for i in range(len(arr)):
        arr[:,col_ix[0]:col_ix[1]][i] += row
    return arr

In [5]:
def insert_row_into_array_torch(arr,row,col_ix):
    col = row.repeat(len(arr),1)
    arr[:,col_ix[0]:col_ix[1]] = col
    return arr

def insert_row_into_array_zero_torch(arr,row,col_ix):
    col = row.repeat(len(arr),1)
    arr[:,col_ix[0]:col_ix[1]] = 0
    arr[:,col_ix[0]:col_ix[1]] += col
    return arr

def insert_row_into_array_loop_torch(arr,row,col_ix):
    for i in range(len(arr)):
        arr[:,col_ix[0]:col_ix[1]][i] = row
    return arr

def insert_row_into_array_loop_zero_torch(arr,row,col_ix):
    arr[:,col_ix[0]:col_ix[1]] = 0
    for i in range(len(arr)):
        arr[:,col_ix[0]:col_ix[1]][i] += row
    return arr

In [6]:
testrow = np.zeros(shape[1])
testarray = np.empty((shape[0],shape[1]*3))

try_np = f"insert_row_into_array_np(testarray,testrow,(100,200));gc.collect()"
try_np_zero= f"insert_row_into_array_zero_np(testarray,testrow,(100,200));gc.collect()"
try_np_loop= f"insert_row_into_array_loop_np(testarray,testrow,(100,200));gc.collect()"
try_np_loop_zero = f"insert_row_into_array_loop_zero_np(testarray,testrow,(100,200));gc.collect()"

## Time numpy functions

In [7]:
timeit.timeit(try_np, number=no_tries,globals=globals())

KeyboardInterrupt: 

In [None]:
timeit.timeit(try_np_zero, number=no_tries,globals=globals())

In [None]:
timeit.timeit(try_np_loop, number=no_tries,globals=globals())

In [None]:
timeit.timeit(try_np_loop_zero, number=no_tries,globals=globals())

## Try torch functions

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
testrow = torch.tensor(testrow).to(device)
testarray = torch.tensor(testarray).to(device)

try_torch = f"insert_row_into_array_torch(testarray,testrow,(100,200));gc.collect()"
try_torch_zero= f"insert_row_into_array_zero_torch(testarray,testrow,(100,200));gc.collect()"
try_torch_loop= f"insert_row_into_array_loop_torch(testarray,testrow,(100,200));gc.collect()"
try_torch_loop_zero= f"insert_row_into_array_loop_zero_torch(testarray,testrow,(100,200));gc.collect()"

In [None]:
timeit.timeit(try_torch, number=no_tries,globals=globals())

In [None]:
timeit.timeit(try_torch_zero, number=no_tries,globals=globals())

In [None]:
timeit.timeit(try_torch_loop, number=no_tries,globals=globals())

In [None]:
timeit.timeit(try_torch_loop_zero, number=no_tries,globals=globals())

In [None]:
# pytorch model
import torchmetrics
import torch
from torch import nn 


class ClassifierSimple(torch.nn.Module):
    def __init__(self,input_dim=300,hidden_size=64):
        super(ClassifierSimple, self).__init__()
        
        
        self.layers = nn.Sequential(
                # flatten input if necessary
                nn.Flatten(),
                nn.Linear(input_dim,hidden_size),
                nn.ReLU(),
                nn.Linear(hidden_size,1)
        )
        
        self.output_activation = nn.Sigmoid()
                
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 
        
    
    def forward(self,x):        
        
        return self.layers(x)
    def predict(self,x):
        x.to(self.device)
        
        return self.output_activation(self.layers(x))
    def predict_numpy(self,x):
        x = torch.tensor(x)
        x.to(self.device)
        return self.output_activation(self.layers(x)).detach().cpu().numpy()
        
    


# Test Prediction function

In [None]:
model = ClassifierSimple()
model = model.to(device)

In [None]:
test = torch.empty((shape[0],shape[1]*3)).to(device)

In [None]:
timeit.timeit(f"model(test);gc.collect()", number=10,globals=globals())

In [None]:
timeit.timeit(f"model.predict_numpy(test);gc.collect()", number=10,globals=globals())

No Difference on cpu ... need to check on gpu

In [1]:
import torch

In [2]:
x = torch.zeros(10000,300)

In [3]:
row = torch.ones(100).reshape(1,100)

In [9]:
torch.arange(10)

tensor([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [10]:
x[:,100:200].index_fill(0,torch.arange(len(x)),row)

RuntimeError: index_fill_ only supports a 0-dimensional value tensor, but got tensor with 2 dimension(s).

# Test to(device)

In [104]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [13]:
import torch

In [15]:
edge_ix = [2,3,6,7]

In [14]:
x = torch.empty(10)

In [3]:
x = torch.rand(15000**2)/10

In [4]:
len(x)

225000000

In [12]:
def compute_rank(scores,ix,mask=None):
    if mask == None:    
        optimistic_rank =(scores > scores[ix]).sum()+1
        pessimistic_rank = (scores >= scores[ix]).sum()

        
    else:
        
        optimistic_rank = ((scores > scores[ix]).index_fill(0,mask,False)).sum()+1
        pessimistic_rank = ((scores >= scores[ix]).index_fill(0,mask,False)).sum()
        
    rank = (optimistic_rank+pessimistic_rank)*0.5
        
    return rank

In [6]:
def test_for_loop(no_nodes):
    for ix in range(no_nodes):
        compute_rank(x,ix)

In [31]:
from sklearn.utils.extmath import cartesian
no_entities = 10

In [32]:
s_o_combs = torch.tensor(cartesian((range(no_entities),range(no_entities))))

In [42]:
s_o_combs[s_o_combs[:,1] == 4]

tensor([[0, 4],
        [1, 4],
        [2, 4],
        [3, 4],
        [4, 4],
        [5, 4],
        [6, 4],
        [7, 4],
        [8, 4],
        [9, 4]])

In [37]:
edge = torch.tensor((8,3))

In [24]:
def get_head_ix(edge):
    return edge[1]
    

In [39]:
(s_o_combs[s_o_combs[:,0] == 8]) == edge

tensor([[ True, False],
        [ True, False],
        [ True, False],
        [ True,  True],
        [ True, False],
        [ True, False],
        [ True, False],
        [ True, False],
        [ True, False],
        [ True, False]])

In [43]:
import torch

In [55]:
x = torch.rand(100,2)
x = torch.vstack((x,x))
torch.unique(x[:,0])

tensor([0.0254, 0.0369, 0.0385, 0.0559, 0.0590, 0.0802, 0.0820, 0.0830, 0.1129,
        0.1385, 0.1404, 0.1464, 0.1597, 0.1666, 0.1749, 0.1787, 0.1849, 0.1860,
        0.1864, 0.1894, 0.1926, 0.1936, 0.2087, 0.2259, 0.2362, 0.2431, 0.2432,
        0.2510, 0.2562, 0.2628, 0.2636, 0.2861, 0.3117, 0.3150, 0.3167, 0.3234,
        0.3400, 0.3578, 0.3680, 0.3685, 0.3845, 0.4022, 0.4223, 0.4458, 0.4459,
        0.4466, 0.4490, 0.4693, 0.4786, 0.4813, 0.4950, 0.5070, 0.5315, 0.5491,
        0.5513, 0.5782, 0.5888, 0.5912, 0.5975, 0.6226, 0.6295, 0.6357, 0.6359,
        0.6633, 0.6647, 0.6673, 0.6688, 0.6693, 0.6720, 0.6769, 0.6776, 0.6794,
        0.6850, 0.6957, 0.7136, 0.7474, 0.7594, 0.7893, 0.7931, 0.7945, 0.7974,
        0.8010, 0.8085, 0.8261, 0.8273, 0.8337, 0.8354, 0.8644, 0.8739, 0.8765,
        0.8768, 0.8990, 0.9069, 0.9142, 0.9369, 0.9434, 0.9724, 0.9769, 0.9814,
        0.9963])

In [12]:
test_for_loop()

In [9]:
import timeit
no_nodes = 2500

In [10]:
timeit.timeit(f'test_for_loop({no_nodes})', number=1,globals=globals())

KeyboardInterrupt: 

In [28]:
from scipy.stats import rankdata

In [None]:
timeit.timeit('rankdata(x)',number=1,globals=globals())