In [None]:
# !pip install 'relevanceai[vis]'

In [1]:

'''
Available DR and Cluster configuration
'''
from typing import Union
from typing_extensions import Literal

# ==================================================
# =============== DR Args ==========================
# ==================================================

DIM_REDUCTION = Literal["pca", "tsne", "umap", "ivis"]
DIM_REDUCTION_DEFAULT_ARGS = {
    'pca': {
        "svd_solver": "auto",
        "random_state": 42
    },
    'tsne': {
        "init": "pca",
        "n_iter": 500,
        "learning_rate": 100,
        "perplexity": 30,
        "random_state": 42,
    },
    'umap': {
        "n_neighbors": 15,
        "min_dist": 0.1,
        "random_state": 42,
        "transform_seed": 42,
    },
    'ivis': {
        "k": 15, 
        "model": "maaten", 
        "n_epochs_without_progress": 2
    }
}


# ==================================================
# =============== Cluster Args =====================
# ==================================================

CLUSTER_NUMERIC = Literal["kmeans", "kmedoids",  None]
CLUSTER_CATEGORICAL = Literal["kmodes",  None]
CLUSTER_MIXED = Literal["kprotoypes", None]
CLUSTER = Union[CLUSTER_NUMERIC, CLUSTER_CATEGORICAL, CLUSTER_MIXED]

CLUSTER_DEFAULT_ARGS = {
    'kmeans': {
        "init": "k-means++", 
        "verbose": 1,
        "compute_labels": True,
        "max_no_improvement": 2
    },
    'kmedoids': {
        "metric": "euclidean",
        "init": "k-medoids++",
        "random_state": 42,
        "method": "pam"
    },
    
## Not yet fully implemented
#     'kmodes': {
#         "init": "Huang", 
#         "verbose": 1,
#         "random_state": 42,
#         "n_jobs": -1
#     },
#     'kprototypes': {
#         "init": "Huang", 
#         "verbose": 1,
#         "random_state": 42,
#         "n_jobs": -1
#     }
}


In [3]:
%load_ext autoreload
%autoreload 2

import sys
sys.path.append('..')

from relevanceai.http_client import Client

dataset_id = "ecommerce-6"
project = "dummy-collections"
api_key = "UzdYRktIY0JxNmlvb1NpOFNsenU6VGdTU0s4UjhUR0NsaDdnQTVwUkpKZw"  # Read access
base_url = "https://api-aueast.relevance.ai/v1/"

client = Client(project=project, api_key=api_key, base_url=base_url)

'''
Retrieve docs in dataset  set `number_of_points_to_render = None` to retrieve all docs
If `vector_label` None, shows markers only and throws a warning
Default dim reduction is 'pca' with default args above
'''

vector_label = "product_name"
vector_field = "product_name_imagetext_vector_"

client.projector.plot(
    dataset_id=dataset_id, 
    vector_field=vector_field,
    number_of_points_to_render=1000
)  



The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
2021-11-08 08:11:09.649 | SUCCESS  | relevanceai.http_client:__init__:43 - Welcome to the development version of the relevanceai Python SDK



The vector label has not been specified.



2021-11-08 08:11:20.266 | SUCCESS  | relevanceai.transport:make_http_request:63 - Response success! (https://api-aueast.relevance.ai/v1/datasets/ecommerce-6/documents/list)
2021-11-08 08:11:40.455 | SUCCESS  | relevanceai.transport:make_http_request:63 - Response success! (https://api-aueast.relevance.ai/v1/datasets/ecommerce-6/documents/list)
2021-11-08 08:11:51.549 | SUCCESS  | relevanceai.transport:make_http_request:63 - Response success! (https://api-aueast.relevance.ai/v1/datasets/ecommerce-6/schema)


In [10]:

'''
If `vector_label` specified, will show `vector_label` as text
Char length can be varied via `vector_label_char_length`
`hover_label` allows you display extra dataset information on hover
'''

dr = 'umap'
cluster = 'kmedoids'

vector_label = "product_name"
vector_field = "product_name_imagetext_vector_"


client.projector.plot(
    ### DR Args for vector plot
    dataset_id = dataset_id,
    vector_field = vector_field,
    dr = dr,
    dr_args = DIM_REDUCTION_DEFAULT_ARGS[ dr ],
    number_of_points_to_render=1000,
    
    ## Plot rendering options
    vector_label = vector_label, 
    vector_label_char_length = 10,
    hover_label = [ vector_label, 'category' ]
)



2021-11-08 08:28:08.881 | SUCCESS  | relevanceai.transport:make_http_request:63 - Response success! (https://api-aueast.relevance.ai/v1/datasets/ecommerce-6/documents/list)
2021-11-08 08:28:29.866 | SUCCESS  | relevanceai.transport:make_http_request:63 - Response success! (https://api-aueast.relevance.ai/v1/datasets/ecommerce-6/documents/list)
2021-11-08 08:28:38.795 | SUCCESS  | relevanceai.transport:make_http_request:63 - Response success! (https://api-aueast.relevance.ai/v1/datasets/ecommerce-6/schema)


In [29]:

'''
If `colour_label` specified, will render colour_label as legend
You can set `colour_label_char_length` to shorten displayed legend
'''

dr = 'umap'
cluster = 'kmedoids'

vector_label = "product_name"
vector_field = "product_name_imagetext_vector_"

client.projector.plot(
    dataset_id = dataset_id,
    vector_field = vector_field,
    number_of_points_to_render=100,
    
    ### Dimensionality reduction args
    dr = dr,
    dr_args = DIM_REDUCTION_DEFAULT_ARGS[ dr ],
    
    ## Plot rendering args
    vector_label = None, 
    colour_label = vector_label,
    colour_label_char_length = 20,
    hover_label = [ vector_label,  'category'],
)

2021-11-08 09:14:35.390 | SUCCESS  | relevanceai.transport:make_http_request:63 - Response success! (https://api-aueast.relevance.ai/v1/datasets/ecommerce-6/documents/list)
2021-11-08 09:14:38.469 | SUCCESS  | relevanceai.transport:make_http_request:63 - Response success! (https://api-aueast.relevance.ai/v1/datasets/ecommerce-6/documents/list)
2021-11-08 09:14:39.381 | SUCCESS  | relevanceai.transport:make_http_request:63 - Response success! (https://api-aueast.relevance.ai/v1/datasets/ecommerce-6/schema)


In [6]:

'''
If `cluster` specified, will override `colour_label` option and render cluster as legend
'''

dr = 'tsne'
cluster = 'kmedoids'

vector_label = "product_name"
vector_field = "product_name_imagetext_vector_"


client.projector.plot(
    dataset_id = dataset_id,
    vector_field = vector_field,
    number_of_points_to_render=100,
    
    ### Dimensionality reduction args
    dr = dr,
    dr_args = DIM_REDUCTION_DEFAULT_ARGS[ dr ], 

    ## Plot rendering args
    vector_label = None, 
    colour_label = vector_label,
    hover_label = [ vector_label, 'category' ],
    
    ### Cluster args
    cluster = cluster,
    cluster_args = {"n_clusters": 10, **CLUSTER_DEFAULT_ARGS[ cluster ]},
)


The vector label has not been specified.



2021-11-08 08:14:28.664 | SUCCESS  | relevanceai.transport:make_http_request:63 - Response success! (https://api-aueast.relevance.ai/v1/datasets/ecommerce-6/documents/list)
2021-11-08 08:14:30.639 | SUCCESS  | relevanceai.transport:make_http_request:63 - Response success! (https://api-aueast.relevance.ai/v1/datasets/ecommerce-6/documents/list)
2021-11-08 08:14:31.659 | SUCCESS  | relevanceai.transport:make_http_request:63 - Response success! (https://api-aueast.relevance.ai/v1/datasets/ecommerce-6/schema)



The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.

