In [1]:
import numpy as np
from thirdai import bolt

In [2]:
model = bolt.UniversalDeepTransformer(
    data_types = {
        "QUERY" : bolt.types.text(),
        "DOC_ID" : bolt.types.categorical(delimiter = ":")
    },
    target = "DOC_ID", 
    n_target_classes = 10000,
    integer_target=True,
    options = {
        "extreme_output_dim" : 1000,
        "fhr" : 100,
        "extreme_num_hashes" : 4,
        "embedding_dimension" : 2048,
        "extreme_classification" : True
    }
)
index = model.get_index()

In [5]:
ind = np.array([[1,2],[3,4]])
val = np.array([[1,2],[3,4]])

In [6]:
tensor = bolt.nn.Tensor(indices = ind, values = val, dense_dim = 10)

In [10]:
tensor[0]

[(1, 1), (2, 2)]

In [21]:
def make_sparse_tensor_from_numpy(np_array : np.ndarray):
    indices = [np.arange(np_array.shape[1]).tolist()]*np_array.shape[0]
    values = [np_array[i, :].tolist() for i in range(np_array.shape[0])]
    return bolt.nn.Tensor(indices = indices, values = values, dense_dim = 100)

In [22]:
array = np.arange(1000).reshape(10, 100)
make_sparse_tensor_from_numpy(array)

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99]
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99]


ValueError: Cannot specify indices for a dense tensor.

In [5]:
def make_output_dataset(labels : list, output_dim : int, mach_index, label_dim = 4294967295):

    def make_sparse_tensor(labels, output_dim):
        return bolt.nn.Tensor(
            indices = labels, 
            values = [[1]*len(x) for x in labels], 
            dense_dim = output_dim
            )

    output_tensor = make_sparse_tensor(labels, output_dim)

    def transform_label_to_mach_label(mach_index, labels):
        mach_labels = []
        for label in labels:
            new_labels = []
            for l in label:
                new_labels+=mach_index.get_entity_hashes(l)
            mach_labels.append(new_labels)
        return mach_labels
    
    mach_labels = transform_label_to_mach_label(mach_index, labels)

    mach_label_tensor = make_sparse_tensor(labels = mach_labels, output_dim = label_dim)

    return make_input_dataset([output_tensor, mach_label_tensor])

In [6]:
input1 = np.arange(1000).reshape(10, 100)
input2 = np.arange(2000).reshape(10, 200)
tensor1 = bolt.nn.Tensor(values = input1, with_grad = False)
tensor2 = bolt.nn.Tensor(values = input2, with_grad = False)

labels = [[i] for i in range(tensor1.activations.shape[0])]

input_dataset = make_input_dataset([tensor1])
output_dataset = make_output_dataset(labels = labels, output_dim = 1000, mach_index = index)

In [7]:
print(output_dataset[0][0].active_neurons[0])
print(output_dataset[0][1].active_neurons[0])
print(input_dataset[0][0].activations[0])

[0]
[265 311 926 563]
[ 0.  1.  2.  3.  4.  5.  6.  7.  8.  9. 10. 11. 12. 13. 14. 15. 16. 17.
 18. 19. 20. 21. 22. 23. 24. 25. 26. 27. 28. 29. 30. 31. 32. 33. 34. 35.
 36. 37. 38. 39. 40. 41. 42. 43. 44. 45. 46. 47. 48. 49. 50. 51. 52. 53.
 54. 55. 56. 57. 58. 59. 60. 61. 62. 63. 64. 65. 66. 67. 68. 69. 70. 71.
 72. 73. 74. 75. 76. 77. 78. 79. 80. 81. 82. 83. 84. 85. 86. 87. 88. 89.
 90. 91. 92. 93. 94. 95. 96. 97. 98. 99.]


In [8]:
input_dataset.active_neurons

AttributeError: 'list' object has no attribute 'active_neurons'

In [None]:
model.train_on_tensors(
    input = input_dataset,
    output = output_dataset
)

: 

In [40]:
data = tuple([input_dataset, output_dataset])