In [None]:
# !pip install 'relevanceai[vis]'

In [1]:

'''
Available DR and Cluster configuration
'''

from typing import Union, _GenericAlias
from typing_extensions import Literal, get_args

# ==================================================
# =============== DR Args ==========================
# ==================================================

DIM_REDUCTION = Literal["pca", "tsne", "umap", "ivis"]

DIM_REDUCTION_DEFAULT_ARGS = {
    'pca': {
        "svd_solver": "auto",
        "random_state": 42
    },
    'tsne': {
        "init": "pca",
        "n_iter": 500,
        "learning_rate": 100,
        "perplexity": 30,
        "random_state": 42,
    },
    'umap': {
        "n_neighbors": 15,
        "min_dist": 0.1,
        "random_state": 42,
        "transform_seed": 42,
    },
    'ivis': {
        "k": 15, 
        "model": "maaten", 
        "n_epochs_without_progress": 2
    }
}


# ==================================================
# =============== Cluster Args =====================
# ==================================================

# CLUSTER_NUMERIC = Literal["kmeans", "kmedoids",  None]
# CLUSTER_CATEGORICAL = Literal["kmodes",  None]
# CLUSTER_MIXED = Literal["kprotoypes", None]
# CLUSTER = Union[CLUSTER_NUMERIC, CLUSTER_CATEGORICAL, CLUSTER_MIXED]

CLUSTER = Literal["kmeans", "kmedoids", "kmodes", "kprototypes", None]

# for cluster in get_args(CLUSTER):
#     print(type(cluster))
#     if type(cluster)==_GenericAlias:
#         for c in get_args(cluster):
#             print(c)



CLUSTER_DEFAULT_ARGS = {
    'kmeans': {
        "init": "k-means++", 
        "verbose": 1,
        "compute_labels": True,
        "max_no_improvement": 2
    },
    'kmedoids': {
        "metric": "euclidean",
        "init": "k-medoids++",
        "random_state": 42,
        "method": "pam"
    },
    
## Not yet fully implemented
#     'kmodes': {
#         "init": "Huang", 
#         "verbose": 1,
#         "random_state": 42,
#         "n_jobs": -1
#     },
#     'kprototypes': {
#         "init": "Huang", 
#         "verbose": 1,
#         "random_state": 42,
#         "n_jobs": -1
#     }
}


In [2]:
from typing import List


def generate_random_string(N: int = 5) -> str:
    """Generate a random string of letters and numbers
    """
    return ''.join(random.choice(
        string.ascii_uppercase + string.digits) \
            for _ in range(N))


def generate_random_vector(N: int = 512) -> List[float]:
    """Generate a random list of floats
    """
    return [ random.random() for _ in range(N) ]

In [3]:
def sample_vector_doc():
    return [{
        "_id":              uuid.uuid4().__str__(),
        "sample_1_label":   generate_random_string(),
        "sample_2_label":   generate_random_string(),
        "sample_3_label":   generate_random_string(),
        "sample_1_vector_": generate_random_vector(N=100),
        "sample_2_vector_": generate_random_vector(N=100),
        "sample_3_vector_": generate_random_vector(N=100)
    }]

In [11]:
import uuid
import random
import string
test_dataset_id = "_sample_vector_dataset_"
sample_vector_docs = sample_vector_doc() * 100

len(sample_vector_docs)

sample_vector_docs[0]

{'_id': 'cf25df2b-f472-4ff7-998f-3324aee66fff',
 'sample_1_label': 'GRZL3',
 'sample_2_label': 'LQ5CW',
 'sample_3_label': 'OBEL2',
 'sample_1_vector_': [0.5528879272322865,
  0.4363113369363202,
  0.4695526302403531,
  0.322227390917053,
  0.3265858879721969,
  0.04300155118184401,
  0.43680511035521163,
  0.7633960876640848,
  0.9291490294388076,
  0.5914171608875546,
  0.8322088486509049,
  0.39378417223684004,
  0.310153986919547,
  0.12704340537827297,
  0.9811333479425177,
  0.764019883249051,
  0.35692487969253817,
  0.19432561105128332,
  0.13245506997394585,
  0.280231830825854,
  0.34820366949051607,
  0.5847692520336905,
  0.28291551172472085,
  0.6620706122750404,
  0.27823600945514926,
  0.34562837761107335,
  0.846012324574498,
  0.7480740602073309,
  0.9570144521234779,
  0.002265796199246606,
  0.13440342861763266,
  0.13457993366581988,
  0.475606191974139,
  0.05335788515108297,
  0.21222215567518066,
  0.31648102532295996,
  0.9478580484587663,
  0.15775312293100208,

In [8]:

from relevanceai import Client

dataset_id = "_sample_vector_dataset_"
project = "4219e219b6907fd6fbf0"
api_key = "TWhLVU9Yd0JmQldJU2NLNXF5anM6TUNHcGhyd1FTV200RHdHbWRCaV9VUQ"  # Read access
base_url = "https://api-aueast.relevance.ai/v1/"


client = Client(project=project, api_key=api_key, base_url=base_url)

response = client.insert_documents(
    test_dataset_id, sample_vector_docs
)
response

2021-11-17 08:11:54.061 | SUCCESS  | relevanceai.http_client:__init__:54 - Welcome to the RelevanceAI Python SDK
2021-11-17 08:11:56.309 | SUCCESS  | relevanceai.transport:make_http_request:69 - Response success! (https://ingest-api-dev-aueast.relevance.ai/latest/datasets/_sample_vector_dataset_/documents/bulk_insert)


{'inserted': 100, 'failed_documents': [], 'failed_documents_detailed': []}

In [15]:
client.datasets.list()
data = client.datasets.documents.list(dataset_id=dataset_id)
print(len(data))
[d['_id'] for d in data]

2021-11-17 08:15:11.715 | SUCCESS  | relevanceai.transport:make_http_request:69 - Response success! (https://api-aueast.relevance.ai/v1/datasets/list)
2021-11-17 08:15:11.824 | SUCCESS  | relevanceai.transport:make_http_request:69 - Response success! (https://api-aueast.relevance.ai/v1/datasets/_sample_vector_dataset_/documents/list)
3


TypeError: string indices must be integers

In [38]:
%load_ext autoreload
%autoreload 2

from relevanceai import Client

dataset_id = "_sample_vector_dataset_"
project = "4219e219b6907fd6fbf0"
api_key = "TWhLVU9Yd0JmQldJU2NLNXF5anM6TUNHcGhyd1FTV200RHdHbWRCaV9VUQ"  # Read access



# project = "backend-read-only"
# api_key = "UE0taExuc0JoNWc5OTRRSDFMTGY6VVRzRzZmbE9Rd0NkR0MxR3hGMkMzdw"  # Read access

base_url = "https://api-aueast.relevance.ai/v1/"
client = Client(project=project, api_key=api_key, base_url=base_url)

client.datasets.list()
data = client.datasets.documents.list(dataset_id=dataset_id)['documents']
len(data)



The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
2021-11-17 06:46:42.071 | SUCCESS  | relevanceai.http_client:__init__:54 - Welcome to the RelevanceAI Python SDK
2021-11-17 06:46:43.218 | SUCCESS  | relevanceai.transport:make_http_request:69 - Response success! (https://api-aueast.relevance.ai/v1/datasets/list)
2021-11-17 06:46:43.309 | SUCCESS  | relevanceai.transport:make_http_request:69 - Response success! (https://api-aueast.relevance.ai/v1/datasets/_sample_vector_dataset_/documents/list)


1

In [18]:
dataset_id = '_sample_vector_dataset_2'

vector_field = 'sample_1_vector_'
client.projector.plot(
    dataset_id=dataset_id, 
    vector_field=vector_field,
    number_of_points_to_render=100,
#     vector_label=vector_label,
#     hover_label=['category']
   dr= 'pca',
    dr_args = {'random_state': 42, 'svd_solver': 'auto'},
    cluster = 'kmeans',
    cluster_args={'compute_labels': True, 'init': 'k-means++', 'max_no_improvement': 2, 'verbose': 1}
)  



2021-11-17 06:37:04.640 | SUCCESS  | relevanceai.transport:make_http_request:69 - Response success! (https://api-aueast.relevance.ai/v1/datasets/_sample_vector_dataset_2/documents/get_where)
2021-11-17 06:37:04.762 | SUCCESS  | relevanceai.transport:make_http_request:69 - Response success! (https://api-aueast.relevance.ai/v1/datasets/_sample_vector_dataset_2/schema)
100


ValueError: n_components=3 must be between 0 and min(n_samples, n_features)=1 with svd_solver='full'

In [None]:
%load_ext autoreload
%autoreload 2

# import sys
# sys.path.append('..')

from relevanceai import Client

dataset_id = "ecommerce-6"
project = "dummy-collections"
api_key = "UzdYRktIY0JxNmlvb1NpOFNsenU6VGdTU0s4UjhUR0NsaDdnQTVwUkpKZw"  # Read access


# dataset_id = "podcasts"
# project = "0a15d88a97a8b390f074"
# api_key = "QnZLM01Yd0JZQTVzdWJmNFdhSHA6TzFLNkNqNXZSNW1GZ1g2Y3JfMTZwQQ"  # Read access


# dataset_id = "_sample_test_dataset"
# project = "4219e219b6907fd6fbf0"
# api_key = "TWhLVU9Yd0JmQldJU2NLNXF5anM6TUNHcGhyd1FTV200RHdHbWRCaV9VUQ"  # Read access


# project = "4219e219b6907fd6fbf0"
# api_key = "TWhLVU9Yd0JmQldJU2NLNXF5anM6TUNHcGhyd1FTV200RHdHbWRCaV9VUQ"  # Read access


base_url = "https://api-aueast.relevance.ai/v1/"
client = Client(project=project, api_key=api_key, base_url=base_url)

'''
Retrieve docs in dataset  set `number_of_points_to_render = None` to retrieve all docs
If `vector_label` None, shows markers only and throws a warning
Default dim reduction is 'pca' with default args above
'''

# vector_label = "product_name"
# vector_field = "product_name_imagetext_vector_"

vector_label = "sample_1_label"
vector_field = "sample_1_vector_"


client.projector.plot(
    dataset_id=dataset_id, 
    vector_field=vector_field,
    number_of_points_to_render=100,
#     vector_label=vector_label,
#     hover_label=['category']
)  



In [None]:

'''
If `vector_label` specified, will show `vector_label` as text
Char length can be varied via `vector_label_char_length`
`hover_label` allows you display extra dataset information on hover
'''

dr = 'umap'

# vector_label = "product_name"
# vector_field = "product_name_imagetext_vector_"


client.projector.plot(
    ### DR Args for vector plot
    dataset_id = dataset_id,
    vector_field = vector_field,
    dr = dr,
    dr_args = DIM_REDUCTION_DEFAULT_ARGS[ dr ],
    number_of_points_to_render=100,
    random_state=42,
    
    ## Plot rendering options
    vector_label = vector_label, 
    vector_label_char_length = 12,
    hover_label = None
)


In [None]:

'''
If `colour_label` specified, will render colour_label as legend
You can set `colour_label_char_length` to shorten displayed legend
'''

dr = 'umap'

# vector_label = "product_name"
# vector_field = "product_name_imagetext_vector_"

fig = client.projector.plot(
    dataset_id = dataset_id,
    vector_field = vector_field,
    number_of_points_to_render=100,
    random_state=42,
    
    ### Dimensionality reduction args
    dr = dr,
    dr_args = DIM_REDUCTION_DEFAULT_ARGS[ dr ],
    
    ## Plot rendering args
    vector_label = None, 
    colour_label = vector_label,
    colour_label_char_length = 20,
#     hover_label = [vector_label, 'category'],
    hover_label = [ vector_label ]
)

fig.data
fig.show()

In [None]:

'''
If `cluster` specified, will override `colour_label` option and render cluster as legend
'''

dr = 'tsne'
cluster = 'kmedoids'

# vector_label = "product_name"
# vector_field = "product_name_imagetext_vector_"


client.projector.plot(
    dataset_id = dataset_id,
    vector_field = vector_field,
    number_of_points_to_render=1000,
    random_state=42,
    
    ### Dimensionality reduction args
    dr = dr,
    dr_args = DIM_REDUCTION_DEFAULT_ARGS[ dr ], 

    ## Plot rendering args
#     vector_label = 'category', 
#     colour_label = 'product_name',
    vector_label = 'title', 
    colour_label = 'creator',
    hover_label = None,
    
    ### Cluster args
    cluster = cluster,
    cluster_args = CLUSTER_DEFAULT_ARGS[ cluster ],
    num_clusters = 10
)

In [None]:
import os
from relevanceai import Client 
# client = Client()

project = "jacky-wong-charlene"
api_key = "S3M4eUpuMEJZQTVzdWJmNHhUdUE6QndsbVhvZ1NTVDZHeFBvbjNEWTYxZw"

base_url = "https://api-aueast.relevance.ai/v1/"

client = Client(project=project, api_key=api_key, base_url=base_url)



client.projector.plot(
    dataset_id="research2vec", 
    vector_field="summary_sentence_transformers_vector_",
#     colour_label="primary_category",
    vector_label="title",
    number_of_points_to_render=100,
    vector_label_char_length= 50,
    dr='pca', # was unable to get ivis working
    dr_args={},
)