In [30]:
import pandas as pd
import numpy as np
import os
import sys

utils_path = os.path.abspath('../../utils')
sys.path.insert(0, utils_path)

from teeth_utils import normalise_teeth_distances

In [31]:
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.options.mode.chained_assignment = None 

In [32]:
MERIDIAN_PATH = "../../data/life_data/life_sheet_final.csv"
TEETH_UP_PATH = "../../data/life_data/teeth_up_tracked_data.json"
TEETH_DOWN_PATH = "../../data/life_data/teeth_down_tracked_data.json"

In [33]:
teeth_up = pd.read_json(TEETH_UP_PATH)
teeth_down = pd.read_json(TEETH_DOWN_PATH)
teeth_df = pd.concat([teeth_up, teeth_down])
teeth_df = teeth_df.reset_index(drop=True)

In [34]:
fn = lambda v: {'toothId': v['toothId'], 'toothPositions': normalise_teeth_distances(v['toothPositions'])}
teeth_df["data"] = teeth_df["data"].apply(fn)

In [35]:
# set notgrownout teeth positions to 0
fn = lambda data: {"toothId": data["toothId"], "toothPositions": [ 0.0 for v in data["toothPositions"]]}
teeth_df["data"][0] = fn(teeth_df["data"][0])
teeth_df["data"][15] = fn(teeth_df["data"][15])
teeth_df["data"][16] = fn(teeth_df["data"][16])
teeth_df["data"][31] = fn(teeth_df["data"][31])


In [36]:
# convert string teeth indices to ints
meridian_df = pd.read_csv(MERIDIAN_PATH)
meridian_df.dropna(subset=["Tooth number", "Domain"], inplace=True)
fn = lambda s: [int(s) for s in s.split("-")]
meridian_df["Tooth number"] = meridian_df["Tooth number"].apply(fn)

In [38]:
# create the training dataset
train_df = pd.DataFrame() 

# column represents data based on one of the multiple braces
# row is all teeth positions associated with a domain and the value of this domain 

n_teeth = teeth_df["data"].shape[0]
# the default position of irrelevant teeth
#non_tooth = [-1.0, -1.0]

# loop through meridian rows
for row_index, row in meridian_df.iterrows():
    # get life domains scores 
    braces_scores = row.filter(like='Brace').dropna()
    # no. of associated teeth scores, based on the number of braces 
    n_braces = braces_scores.shape[0]
    
    # get the meridian indexes of the relevant teeth
    teeth_ids = row["Tooth number"]
    # match relevant teeth against all teeth
    mask = teeth_df["data"].apply(lambda x: x['toothId'] in teeth_ids)
    associated_teeth = teeth_df["data"][mask]
    # get bool mask of the associated teeth
    mask = teeth_df["data"].isin(associated_teeth)
    # get the indexes of the associated teeth 
    associated_teeth_indexes = np.flatnonzero(mask)
    
    xys = []
    
    for brace_index in range(n_braces):
        x = np.full((len(associated_teeth_indexes), 2), [-1.0, -1.0])
        y = braces_scores.iloc[brace_index]

        for tooth_index, tooth in enumerate(associated_teeth):
            x[tooth_index] = tooth["toothPositions"][brace_index]
        xys.append([x,y])
        
    train_df = train_df.append(pd.Series(xys), ignore_index=True)

23
23
23
23
23
23
23
23
23
23
23
23
23
23
23
23
23
23
23
23
23
23
23
23
23
23
23
23
23
23
23
23
23
23
23
23
23
23
23
23
23
23
23
23
23
23
23
23
23
23
23
23
23
23
23
23
23
23
23
23
23
23
23
23
23
23
23
23
23
23
23
23
23
23
23
23
23
23
23
23
23
23
23
23
23
23
23
23
23
23
23
23
23
23
23
23
23
23


In [26]:
out_dir = os.path.abspath('./output')

if not os.path.exists(out_dir):
   os.makedirs(out_dir)

out_path = os.path.join(out_dir, "train_df.pickle")
train_df.to_pickle(out_path)