In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import os
import matplotlib.pyplot as plt
from sklearn.manifold import TSNE
from pprint import pprint
from utils.dataset import all_datasets, update_config
from utils.utils import load_features, read_lines

In [None]:
def plot_TSNE_parametrized(config):
    features = []
    labels = []
    sample_ratio = config["ratio"]
    for ind, accent in enumerate(config["all_accents"]):
        if accent == "us": continue
        all_JSON_PATH = os.path.join(
            config["FULL_DATASET_PATH"],
            accent,
            "selection.json"
        )

        curr_features = load_features(read_lines(all_JSON_PATH), config["dataset"], config["FULL_DATASET_PATH"], config["feature_type"])
        curr_features = curr_features[::sample_ratio]
        features.append(curr_features)
        labels.extend([accent for _ in range(len(curr_features))])
    
    features = np.concatenate(features, axis=0)
    labels = labels

    feature_cols = [f"dim_{_}" for _ in range(features.shape[1])]
    # print(feature_cols)
    # print(features.shape)
    df = pd.DataFrame(features, columns=feature_cols)
    df["label"] = labels
    perp = config["perp"]
    iters = config["iters"]
    tsne = TSNE(n_components=2, verbose=0, perplexity=perp, n_iter=iters)
    tsne_results = tsne.fit_transform(df[feature_cols].values)
    df["tsne-2d-one"] = tsne_results[:, 0]
    df["tsne-2d-two"] = tsne_results[:, 1]


    fig = plt.figure(figsize=(10, 10))
    _ax = fig.add_subplot(1, 1, 1)

    # palette = np.array()

    g = sns.scatterplot(
        x="tsne-2d-one",
        y="tsne-2d-two",
        hue="label",
        palette=sns.color_palette("bright", len(set(labels))),
        data=df,
        # data=df.iloc[list(range(len(X_ground)))],
        legend="full",
        alpha=0.6,
        ax=_ax,
    )

    g.legend(
        loc="upper right",
    )

    fig.tight_layout()
    plt.title("TSNE-{}-{}-perp{}-iter{}-ratio{}".format(config["dataset"], config["feature_type"], perp, iters, sample_ratio))
    plt.show()

In [None]:
def plot_TSNE(config):
    features = []
    labels = []
    for ind, accent in enumerate(config["all_accents"]):
        if accent == "us": continue
        all_JSON_PATH = os.path.join(
            config["FULL_DATASET_PATH"],
            accent,
            "selection.json"
        )

        curr_features = load_features(read_lines(all_JSON_PATH), config["dataset"], config["FULL_DATASET_PATH"], config["feature_type"])
        features.append(curr_features)
        labels.extend([accent for _ in range(len(curr_features))])
    
    features = np.concatenate(features, axis=0)
    labels = labels

    feature_cols = [f"dim_{_}" for _ in range(features.shape[1])]
    # print(feature_cols)
    # print(features.shape)
    df = pd.DataFrame(features, columns=feature_cols)
    df["label"] = labels
    perp = 40
    iters = 1000
    tsne = TSNE(n_components=2, verbose=1, perplexity=perp, n_iter=iters)
    tsne_results = tsne.fit_transform(df[feature_cols].values)
    df["tsne-2d-one"] = tsne_results[:, 0]
    df["tsne-2d-two"] = tsne_results[:, 1]


    fig = plt.figure(figsize=(10, 10))
    _ax = fig.add_subplot(1, 1, 1)

    # palette = np.array()

    g = sns.scatterplot(
        x="tsne-2d-one",
        y="tsne-2d-two",
        hue="label",
        palette=sns.color_palette("bright", len(set(labels))),
        data=df,
        # data=df.iloc[list(range(len(X_ground)))],
        legend="full",
        alpha=0.6,
        ax=_ax,
    )

    g.legend(
        loc="upper right",
    )

    fig.tight_layout()
    plt.title("TSNE-{}-{}".format(config["dataset"], config["feature_type"]))
    plt.show()




In [None]:
config = {
    "dataset": "INDIC",
    "server": "SWARA",
    "feature_type": "MFCC",
}
config = update_config(config)
# pprint(config)
plot_TSNE(config)

In [None]:
config = {
    "dataset": "L2",
    "server": "SWARA",
    "feature_type": "MFCC",
}
config = update_config(config)
# pprint(config)
plot_TSNE(config)

In [None]:
config = {
    "dataset": "MCV",
    "server": "SWARA",
    "feature_type": "MFCC",
}
config = update_config(config)
# pprint(config)
plot_TSNE(config)

In [None]:
config = {
    "dataset": "L2",
    "server": "SWARA",
    "feature_type": "w2v2_Ftill10_768-512-256",
    "perp": 40,
    "iters": 1000,
    "ratio": 3
}
config = update_config(config)
# pprint(config)
plot_TSNE_parametrized(config)

In [None]:
config = {
    "dataset": "L2",
    "server": "SWARA",
    "feature_type": "w2v2_Ftill10_768-512-256",
    "perp": 70,
    "iters": 10000,
    "ratio": 3
}
config = update_config(config)
# pprint(config)
plot_TSNE_parametrized(config)

In [None]:
config = {
    "dataset": "L2",
    "server": "SWARA",
    "feature_type": "w2v2_Ftill10_768-512-256",
    "perp": 80,
    "iters": 10000
}
config = update_config(config)
# pprint(config)
plot_TSNE_parametrized(config)

In [None]:
config = {
    "dataset": "L2",
    "server": "SWARA",
    "feature_type": "w2v2_Ftill10_768-512-256",
    "perp": 90,
    "iters": 10000
}
config = update_config(config)
# pprint(config)
plot_TSNE_parametrized(config)

In [None]:
config = {
    "dataset": "MCV",
    "server": "SWARA",
    "feature_type": "w2v2_Ftill10_768-512-256",
}
config = update_config(config)
# pprint(config)
plot_TSNE(config)