# Predict Othello

## Import Libs

In [23]:
import pandas as pd
from paths import *
import os
import numpy as np

## Construct mapper

In [10]:
def read_csv_and_map(filename):
    # Read the CSV file into a pandas DataFrame
    df = pd.read_csv(filename)  # assuming the file is tab-separated

    # Create a dictionary to map identifier to an array of features
    identifier_to_features = {}

    # Iterate through each row in the DataFrame
    for index, row in df.iterrows():
        # Extract identifier and features
        identifier = row['hs_font']
        features = row.iloc[2:].values.tolist()  # Skip the first two columns

        # Map identifier to features
        identifier_to_features[identifier] = features
    
    identifier_to_features["feature_names"] = list(df.columns[2:])

    return identifier_to_features

In [11]:
csv_filename = os.path.join(pred_results_dir, 'handshapefeatures_codepoints.csv')
identifier_mapping = read_csv_and_map(csv_filename)

In [49]:
identifier_mapping

{'0': [0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0],
 '1': [1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0],
 '2': [1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0],
 '3': [1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1],
 '4': [1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0],
 '5': [1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0],
 '6': [1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1],
 '7': [1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1],
 '8': [1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1],
 '9': [1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1],
 '-': [0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0],
 '(': [1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0],
 ')': [0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0],
 ',': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
 '/': [0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1,

## Define difference measurement methods

In [127]:
from sklearn.metrics import pairwise_distances

def all_same_edit(a, b): 
    # suppose the input is list
    arraied_a = np.expand_dims(np.array(a), axis=0)
    arraied_b = np.expand_dims(np.array(b), axis=0)
    distance_matrix = pairwise_distances(arraied_a, arraied_b, metric='cityblock')
    return distance_matrix.item()

def fingerless_edit(a, b): 
    # this method does not poly-count the different fingers (except thumb) but instead takes the mean 
    # of the four fingers differences at each position. But for each position, the error is not averaged. 
    """
    Arthur: 
    My intuition is that the between-finger difference calculation is the most faithful way to quantify differences between handshapes. 
    For example "j", "k", and "l" handshapes seem related in a hierarchical fashion. 
    The difference between "j" and "k" is a difference of 1 at k2 (knuckle 2) for the fingers ring, middle, and index. 
    The difference between "j" and "l" is a difference of 1 at splayed for ring, middle, and index. 
    But the difference between "k" and "l" is a difference of 1 at k2 and 1 at splayed for ring, middle, and index.
    """
    arraied_a = np.array(a)
    arraied_b = np.array(b)

    mat_a = np.reshape(arraied_a, (5,4))
    mat_b = np.reshape(arraied_b, (5,4))

    fingers_a = mat_a[:4]
    fingers_b = mat_b[:4]

    thumb_a = mat_a[4]
    thumb_b = mat_b[4]

    fingerdist = np.not_equal(fingers_a, fingers_b).mean(axis=0)
    thumbdist = np.not_equal(thumb_a, thumb_b)

    totaldist = (fingerdist + thumbdist).sum()

    return totaldist

def knuckleless_edit(a, b): 
    # this method does not poly-count the two knuckles but instead takes the mean 
    # of the two differences at finger. But for each finger, the error is not averaged. 
    """
    This one may not be as faithful as the fingerless one. 
    However, it only compared straight fingers vs curled while not pay attention to what kind of curling
    """
    arraied_a = np.array(a)
    arraied_b = np.array(b)

    mat_a = np.reshape(arraied_a, (5,4))
    mat_b = np.reshape(arraied_b, (5,4))

    ka = mat_a[:, :2]
    kb = mat_b[:, :2]

    oa = mat_a[:, 2:]
    ob = mat_b[:, 2:]

    knuckledist = np.not_equal(ka, kb).mean(axis=1, keepdims=True)
    otherdist = np.not_equal(oa, ob)
    totaldist = np.concatenate((knuckledist, otherdist), axis=1).sum()

    return totaldist

def fingerless_knuckleless_edit(a, b): 
    """
    Integrating both cross-finger and cross-knuckle differences
    """
    arraied_a = np.array(a)
    arraied_b = np.array(b)

    mat_a = np.reshape(arraied_a, (5,4))
    mat_b = np.reshape(arraied_b, (5,4))

    fka = mat_a[:4, :2]
    fkb = mat_b[:4, :2]

    foa = mat_a[:4, 2:]
    fob = mat_b[:4, 2:]

    tka = mat_a[4:, :2]
    tkb = mat_b[4:, :2]

    toa = mat_a[4:, 2:]
    tob = mat_b[4:, 2:]

    fkd = np.not_equal(fka, fkb).mean(axis=1, keepdims=True)
    fod = np.not_equal(foa, fob)
    
    tkd = np.not_equal(tka, tkb).mean(axis=1, keepdims=True)
    tod = np.not_equal(toa, tob)

    ftotald = np.concatenate((fkd, fod), axis=1).mean(axis=0)
    ttotald = np.concatenate((tkd, tod), axis=1)

    totaldist = (ftotald + ttotald).sum()

    return totaldist

### Othello Matrix Template

In [153]:
np.array(identifier_mapping["feature_names"]).reshape(5,4)

array([['pinkie_k1', 'pinkie_k2', 'pinkie_splay', 'pinkie_tips'],
       ['ring_k1', 'ring_k2', 'ring_splay', 'ring_tips'],
       ['middle_k1', 'middle_k2', 'middle_splay', 'middle_tips'],
       ['index_k1', 'index_k2', 'index_splay', 'index_tips'],
       ['thumb_k1', 'thumb_k2', 'thumb_splay', 'thumb_tips']],
      dtype='<U12')

## Test item: x and y

In [133]:
all_same_edit(identifier_mapping["x"], identifier_mapping["y"])

7.0

In [134]:
fingerless_edit(identifier_mapping["x"], identifier_mapping["y"])

4.0

In [135]:
knuckleless_edit(identifier_mapping["x"], identifier_mapping["y"])

4.5

In [136]:
fingerless_knuckleless_edit(identifier_mapping["x"], identifier_mapping["y"])

3.0

## Maximally different (20\*0 vs 20\*1)

In [147]:
a = np.zeros(20, dtype=int)
b = np.ones(20, dtype=int)

In [149]:
all_same_edit(a, b)

20.0

In [150]:
fingerless_edit(a, b)

8.0

In [151]:
knuckleless_edit(a, b)

15.0

In [152]:
fingerless_knuckleless_edit(a, b)

6.0

## Generate scorings and append to result

In [174]:
tmdt = pd.read_excel(os.path.join(pred_results_dir, 'HKSL_Prediction_Cynthia_test_mono.xlsx'), index_col=0)

tmdt["all_same_edit_dist"] = tmdt.apply(lambda x: all_same_edit(identifier_mapping[x["target"]], identifier_mapping[x["predicted"]]), axis=1)
tmdt["fingerless_edit_dist"] = tmdt.apply(lambda x: fingerless_edit(identifier_mapping[x["target"]], identifier_mapping[x["predicted"]]), axis=1)
tmdt["knuckleless_edit_dist"] = tmdt.apply(lambda x: knuckleless_edit(identifier_mapping[x["target"]], identifier_mapping[x["predicted"]]), axis=1)
tmdt["fingerless_knuckleless_edit_dist"] = tmdt.apply(lambda x: fingerless_knuckleless_edit(identifier_mapping[x["target"]], identifier_mapping[x["predicted"]]), axis=1)

tmdt["asd_ratio"] = tmdt["all_same_edit_dist"] / 20
tmdt["fd_ratio"] = tmdt["fingerless_edit_dist"] / 8
tmdt["kd_ratio"] = tmdt["knuckleless_edit_dist"] / 15
tmdt["fkd_ratio"] = tmdt["fingerless_knuckleless_edit_dist"] / 6

tmdt.to_excel(os.path.join(pred_results_dir, 'HKSL_Prediction_Cynthia_test_mono_with_distance.xlsx'), index=False)

In [173]:
tmdt = pd.read_excel(os.path.join(pred_results_dir, 'HKSL_Prediction_Cynthia_test_poly.xlsx'), index_col=0)
tmdt['target'] = tmdt['target'].fillna('nan')

tmdt["all_same_edit_dist"] = tmdt.apply(lambda x: all_same_edit(identifier_mapping[x["target"]], identifier_mapping[x["predicted"]]) if x["target"] != "nan" else -1, axis=1)
tmdt["fingerless_edit_dist"] = tmdt.apply(lambda x: fingerless_edit(identifier_mapping[x["target"]], identifier_mapping[x["predicted"]]) if x["target"] != "nan" else -1, axis=1)
tmdt["knuckleless_edit_dist"] = tmdt.apply(lambda x: knuckleless_edit(identifier_mapping[x["target"]], identifier_mapping[x["predicted"]]) if x["target"] != "nan" else -1, axis=1)
tmdt["fingerless_knuckleless_edit_dist"] = tmdt.apply(lambda x: fingerless_knuckleless_edit(identifier_mapping[x["target"]], identifier_mapping[x["predicted"]]) if x["target"] != "nan" else -1, axis=1)

tmdt["asd_ratio"] = tmdt["all_same_edit_dist"] / 20
tmdt["fd_ratio"] = tmdt["fingerless_edit_dist"] / 8
tmdt["kd_ratio"] = tmdt["knuckleless_edit_dist"] / 15
tmdt["fkd_ratio"] = tmdt["fingerless_knuckleless_edit_dist"] / 6

tmdt.to_excel(os.path.join(pred_results_dir, 'HKSL_Prediction_Cynthia_test_poly_with_distance.xlsx'), index=False)

In [175]:
tmdt = pd.read_excel(os.path.join(pred_results_dir, 'HKSL_Prediction_Cynthia_train.xlsx'), index_col=0)

tmdt["all_same_edit_dist"] = tmdt.apply(lambda x: all_same_edit(identifier_mapping[x["target"]], identifier_mapping[x["predicted"]]), axis=1)
tmdt["fingerless_edit_dist"] = tmdt.apply(lambda x: fingerless_edit(identifier_mapping[x["target"]], identifier_mapping[x["predicted"]]), axis=1)
tmdt["knuckleless_edit_dist"] = tmdt.apply(lambda x: knuckleless_edit(identifier_mapping[x["target"]], identifier_mapping[x["predicted"]]), axis=1)
tmdt["fingerless_knuckleless_edit_dist"] = tmdt.apply(lambda x: fingerless_knuckleless_edit(identifier_mapping[x["target"]], identifier_mapping[x["predicted"]]), axis=1)

tmdt["asd_ratio"] = tmdt["all_same_edit_dist"] / 20
tmdt["fd_ratio"] = tmdt["fingerless_edit_dist"] / 8
tmdt["kd_ratio"] = tmdt["knuckleless_edit_dist"] / 15
tmdt["fkd_ratio"] = tmdt["fingerless_knuckleless_edit_dist"] / 6

tmdt.to_excel(os.path.join(pred_results_dir, 'HKSL_Prediction_Cynthia_train_with_distance.xlsx'), index=False)