In [1]:
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
import plotly.express as px

from paths import *
from ssd_paths import *

## Local path define

In [77]:
mapper_path = ssd_src_path + "mapper/mandarin_to_feats.xlsx"
# rec_name = "dim-3-1130161723"
# rec_name = "dim-3-1130205041"
# rec_name = "dim-3-1130222703"
rec_name = "dim-3-1130225525"
result_dir = root_path + "results/man/" + f"{rec_name}/"
mk(result_dir)

model_save_dir = model_man_save_dir + f"{rec_name}/"

## Define util functions

In [78]:
def get_data_pair(selected_guide, hiddens): 
    return hiddens[selected_guide.index.tolist()], selected_guide["covertag"].tolist()

def get_random_samples(guide, to_choose_tag, cover_tag, max_samples=500): 
    selected_guide = guide[guide["tag"] == to_choose_tag]
    count = len(selected_guide)
    if len(selected_guide) > max_samples: 
        selected_guide = selected_guide.sample(max_samples)
        count = max_samples
    selected_guide["covertag"] = cover_tag
    return selected_guide, count

def combine_guide(guides): 
    return pd.concat(guides)

def plot3dGroup(guide):
    config = {
    'toImageButtonOptions': {
        'format': 'png', # one of png, svg, jpeg, webp
        'filename': 'custom_image',
        'height': 1280,
        'width': 1280,
        'scale': 1 # Multiply title/legend/axis/canvas sizes by this factor
    }
    }
    fig = px.scatter_3d(guide, x="norm_0", y="norm_1", z="norm_2",
                color='tag')
    fig.update_traces(marker=dict(size=2),
                    selector=dict(mode='markers'))
    fig.update_layout(
        scene = dict(
            xaxis = dict(nticks=8, range=[-1,1],),
                        yaxis = dict(nticks=8, range=[-1,1],),
                        zaxis = dict(nticks=8, range=[-1,1],),),)
    fig.update_layout(legend= {'itemsizing': 'constant'})
    fig.update_layout(legend_title_text='Class')
    fig.update_layout(
        legend=dict(
            x=0,
            y=1,
            title_font_family="Times New Roman",
            font=dict(
                family="Times New Roman",
                size=36,
                color="black"
            ),
            # bgcolor="LightSteelBlue",
            bordercolor="Black",
            borderwidth=1
        )
    )
    fig.update_layout(
        margin=dict(l=0, r=0, t=0, b=0),
    )
    camera = dict(
        eye=dict(x=0., y=0., z=2.5)
    )
    fig.update_layout(scene_camera=camera)
    html_plot = fig.to_html(full_html=False, config=config)
    # fig.show(config=config)
    return html_plot

def minmax(val, themin, themax, a=-1, b=1):
    return (b - a) * ((val - themin) / (themax - themin)) + a

## Load data

In [79]:
heim_hiddens = np.load(os.path.join(model_save_dir, "heim_hiddens.npy"))
heim_df = pd.read_csv(os.path.join(model_save_dir, "heim_encodes.csv"))

In [80]:
heim_df["tag"].unique()

array(['iang', 'z', 'ii', 'l', 'ian', 'x', 'v', 'j', 'iou', 'sh', 'en',
       'm', 'e', 'n', 'k', 'r', 'ei', 'd', 'ou', 'i', 'g', 'an', 'zh',
       'iii', 'uo', 'u', 'b', 'a', 't', 'ang', 'ch', 'eng', 'ui', 'ai',
       'ao', 'ie', 'uang', 'ing', 's', 'rr', 'iao', 'f', 'p', 'q', 've',
       'uan', 'h', 'in', 'un', 'ong', 'ia', 'van', 'c', 'ua', 'iong',
       'uai', 'vn', 'o'], dtype=object)

In [81]:
themin = heim_df["0"].min()
themax = heim_df["0"].max()
heim_df["norm_0"] = heim_df["0"].apply(lambda x: minmax(x, themin, themax))

themin = heim_df["1"].min()
themax = heim_df["1"].max()
heim_df["norm_1"] = heim_df["1"].apply(lambda x: minmax(x, themin, themax))

themin = heim_df["2"].min()
themax = heim_df["2"].max()
heim_df["norm_2"] = heim_df["2"].apply(lambda x: minmax(x, themin, themax))

## Draw vowels

In [82]:
def proc_draw(guide, tags, max): 
    selected_guides = []
    num_counts = []
    for tag in tags: 
        selected_guide, count = get_random_samples(guide, tag, tag, max_samples=max)
        selected_guides.append(selected_guide)
        num_counts.append(count)

    combined_guide = combine_guide(selected_guides)
    outhtml = plot3dGroup(combined_guide)

    filename = "-".join(tags)
    outname = os.path.join(result_dir, f"{filename}.html")
    with open(outname, "w") as f:
        f.write('<meta charset="UTF-8">')
        f.write("<h3>Phones: {}</h3>".format(", ".join(tags)))
        # f.write("<h3>IPA: {}</h3>".format(", ".join(these_ipas)))
        f.write("<h3>Counts: {}</h3>".format(", ".join(map(str, num_counts))))
        f.write("<hr>")
        # f.write(outtable)
        f.write("<hr>")
        f.write(outhtml)

In [83]:
proc_draw(heim_df, ["sh", "r"], 3000)



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [84]:
proc_draw(heim_df, ["a", "e", "i", "uo", "u", "v"], 1500)



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [85]:
proc_draw(heim_df, ["d", "t"], 3000)



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [86]:
proc_draw(heim_df, ["a", "e", "rr", "i", "u", "v"], 1500)



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [87]:
proc_draw(heim_df, ["i", "ii", "iii"], 1500)



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [88]:
proc_draw(heim_df, ["p", "t", "k"], 3000)



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [89]:
# vowels = ["a", "e", "i", "uo", "u", "v"]
# selected_guides = []
# num_counts = []
# for vowel in vowels: 
#     selected_guide, count = get_random_samples(heim_df, vowel, vowel,max_samples=1000)
#     selected_guides.append(selected_guide)
#     num_counts.append(count)

# combined_guide = combine_guide(selected_guides)
# # selected_hiddens, selected_tags = get_data_pair(combined_guide, heim_hiddens)

# outhtml = plot3dGroup(combined_guide)

# outname = os.path.join(result_dir, "vowels.html")
# with open(outname, "w") as f:
#     f.write('<meta charset="UTF-8">')
#     # f.write("<h3>Phones: {}</h3>".format(", ".join(these)))
#     # f.write("<h3>IPA: {}</h3>".format(", ".join(these_ipas)))
#     # f.write("<h3>Counts: {}</h3>".format(", ".join(map(str, nums))))
#     f.write("<hr>")
#     # f.write(outtable)
#     f.write("<hr>")
#     f.write(outhtml)