In [5]:
import csv
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import sys, os

In [22]:
all_dist_file_dir = "/Users/kiyoshi/Desktop/jov_everything/face_morph_v4_5_sets_dist/"

frame_A = [1, 2, 4, 5, 6]
uniform_B = [86, 101, 111, 121, 132]
enriched_B = [84, 86, 87, 100, 142]
test = [36, 38, 40, 42, 45,
      47, 50, 52, 55, 57,
      60, 62, 65, 67, 70,
      72, 74, 76, 78, 80]

In [62]:
def plot_hist(distance_file_path,
             save_dir,
             index_A,
             index_B,
             index_test):
    """
    """    
    # Load distance CSV file
    data = pd.read_csv(distance_file_path)
    data.columns = ['ind', 'morph_name', 'frame', 'dist_to_A', 'dist_to_B']
    data = data.drop('ind', axis=1)
    data = data.drop("morph_name", axis=1)
    
    # Find data frames for training A, training B and testing
    training_A_df = data.loc[data['frame'].isin(index_A)].set_index('frame')
    training_B_df = data.loc[data['frame'].isin(index_B)].set_index('frame')
    test_df = data.loc[data['frame'].isin(index_test)].set_index('frame')
    
    """
    print("Training A: ", training_A_df.shape)
    print("Training B: ", training_B_df.shape)
    print("Testing: ", test_df.shape)
    """
    # Get some stats
    print(training_A_df['dist_to_A'].describe())
    print(training_B_df['dist_to_A'].describe())
    print(test_df['dist_to_A'].describe())
    
    # Plot data
    nb_training = training_A_df.shape[0]
    nb_testing = test_df.shape[0]
    
    plt.plot(training_A_df['dist_to_A'], [0]*nb_training , "-r", label="training_A")
    plt.plot(training_B_df['dist_to_A'], [2]*nb_training, "-g", label="training_B")
    plt.plot(test_df['dist_to_A'], [1]*nb_testing, "-b", label="test")
    plt.legend(loc="lower right")
    
    # Save plot
    save_fig_path = save_dir + "/" + distance_file_path.split("/")[-1].split("/")[0] + "_data_plot.png"
    
    plt.savefig(save_fig_path)
    plt.clf()
    
    # Plot histograms to see the data distribution
    fig, ax = plt.subplots(nrows=3, ncols=1, figsize=(6, 9), sharey=True, sharex=True)

    ax[0].hist(training_A_df['dist_to_A'], bins=20, label="training_A", width=0.8)
    ax[0].set_title('Training A')
    ax[1].hist(training_B_df['dist_to_A'], bins=20,label="training_B", width=0.8)
    ax[1].set_title('Training B')
    ax[2].hist(test_df['dist_to_A'], bins=20, label="test", width=0.8)
    ax[2].set_title('Testing')

    fig.tight_layout()

    save_hist_path = save_dir + "/" + distance_file_path.split("/")[-1].split("/")[0] + "_hist.png"
    plt.savefig(save_hist_path)
    plt.clf()

In [63]:
all_dist_files = os.listdir(all_dist_file_dir)

for one_file in all_dist_files:
    if one_file.endswith(".csv"):
        print("Current file: ",os.path.join(all_dist_file_dir, one_file))
        print("Model and config: ", one_file)

        # check whether this file is for uniform or enriched tail
        if one_file.split(".")[0].split("_")[-1] == "uniform":
            train_B = uniform_B
        else:
            train_B = enriched_B

        plot_hist(distance_file_path=os.path.join(all_dist_file_dir, one_file),
                  save_dir=all_dist_file_dir,
                  index_A=frame_A,
                  index_B=train_B,
                  index_test=test)
    else:
        continue

Current file:  /Users/kiyoshi/Desktop/jov_everything/face_morph_v4_5_sets_dist/facenet_uniform.csv
Model and config:  facenet_uniform.csv
count    450.000000
mean       0.024027
std        0.014158
min        0.000000
25%        0.017688
50%        0.026642
75%        0.033463
max        0.061044
Name: dist_to_A, dtype: float64
count    450.000000
mean       0.298613
std        0.077429
min        0.117303
25%        0.242706
50%        0.289646
75%        0.349069
max        0.576735
Name: dist_to_A, dtype: float64
count    1800.000000
mean        0.201630
std         0.059214
min         0.076758
25%         0.158018
50%         0.195766
75%         0.235922
max         0.402377
Name: dist_to_A, dtype: float64
Current file:  /Users/kiyoshi/Desktop/jov_everything/face_morph_v4_5_sets_dist/facenet_enriched_tail.csv
Model and config:  facenet_enriched_tail.csv
count    450.000000
mean       0.024027
std        0.014158
min        0.000000
25%        0.017688
50%        0.026642
75%     

<Figure size 432x288 with 0 Axes>

<Figure size 432x648 with 0 Axes>

<Figure size 432x648 with 0 Axes>

<Figure size 432x648 with 0 Axes>

<Figure size 432x648 with 0 Axes>

<Figure size 432x648 with 0 Axes>

<Figure size 432x648 with 0 Axes>

<Figure size 432x648 with 0 Axes>

<Figure size 432x648 with 0 Axes>

<Figure size 432x648 with 0 Axes>

<Figure size 432x648 with 0 Axes>