In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import re
import os

In [None]:
def get_sorted_csv_files(directory):
    # Define the regular expression pattern
    pattern = re.compile(r'true_estimate_(\d{4})\.csv')
    
    # Get files in the directory
    files = os.listdir(directory)
    
    # Extract file names and 4-digit numbers
    csv_files = []
    for file in files:
        match = pattern.match(file)
        if match:
            csv_files.append((file, int(match.group(1))))
    
    # Sort by the 4-digit number
    sorted_csv_files = sorted(csv_files, key=lambda x: x[1])
    # print(sorted_csv_files)
    
    # Get the list of sorted file names
    sorted_file_names = [file[0] for file in sorted_csv_files]
    
    return sorted_file_names

def read_csv_files(directory, file_list):
    data_frames = []
    for file in file_list:
        file_path = os.path.join(directory, file)
        df = pd.read_csv(file_path)
        data_frames.append(df)
    return data_frames

In [None]:
network_name = "result_network10_6"
seed="seed0"
system_name = "BO_system_CT"
directory_path=os.path.join(network_name, system_name,"true_estimate",seed)

sorted_file_list = get_sorted_csv_files(directory_path)
data_frame_list = read_csv_files(directory_path, sorted_file_list)


for df in data_frame_list:
    print(df)

In [None]:
def plot_true_estimate(data_frame_list,title='True vs Estimate'):
    # Retrieve the colormap
    cmap = cm.get_cmap('viridis', len(data_frame_list))
    
    plt.figure(figsize=(7, 6))
    
    for idx, df in enumerate(data_frame_list):
        color = cmap(idx / len(data_frame_list))
        # plt.scatter(df['true'], df['estimate'], label=f'File {idx}', color=color)
        plt.scatter(df['true'], df['estimate'], color=color,marker=".",alpha=0.5)
    
    # Plot the gray y=x line
    min_val = min(df['true'].min() for df in data_frame_list)
    max_val = max(df['true'].max() for df in data_frame_list)
    plt.plot([min_val, max_val], [min_val, max_val], color='gray', linestyle='--')
    
    # Add a color bar
    norm = plt.Normalize(0, len(data_frame_list))
    sm = plt.cm.ScalarMappable(cmap=cmap, norm=norm)
    sm.set_array([])
    plt.colorbar(sm)


    plt.xlabel('True')
    plt.ylabel('Estimate')
    plt.title(title)
    # plt.legend()
    plt.show()

In [None]:
plot_true_estimate(data_frame_list, title='True vs Estimate CT')

In [None]:
def plot_true_estimate_one(data_frame_list,index=0,title='True vs Estimate'):
    # Retrieve the colormap
    cmap = cm.get_cmap('viridis', len(data_frame_list))
    
    plt.figure(figsize=(7, 6))
    
    # for idx, df in enumerate(data_frame_list):
    idx=index
    df=data_frame_list[idx]
    color = cmap(idx / len(data_frame_list))
    plt.scatter(df['true'], df['estimate'], color=color,marker=".",alpha=0.5)
    
    # Plot the gray y=x line
    min_val = min(df['true'].min() for df in data_frame_list)
    max_val = max(df['true'].max() for df in data_frame_list)
    plt.plot([min_val, max_val], [min_val, max_val], color='gray', linestyle='--')
    
    # Add a color bar
    norm = plt.Normalize(0, len(data_frame_list))
    sm = plt.cm.ScalarMappable(cmap=cmap, norm=norm)
    sm.set_array([])
    plt.colorbar(sm)


    plt.xlabel('True')
    plt.ylabel('Estimate')
    plt.title(title)
    # plt.legend()
    plt.show()

In [None]:
# plot_true_estimate_one(data_frame_list, 0, title='True vs Estimate CT(0)')
# plot_true_estimate_one(data_frame_list, 99, title='True vs Estimate CT(99)')

In [None]:
def plot_true_histgram(data_frame_list,title='True vs Estimate'):
    # Retrieve the colormap
    cmap = cm.get_cmap('viridis', len(data_frame_list))
    
    plt.figure(figsize=(7, 6))
    
    for idx, df in enumerate(data_frame_list):
        color = cmap(idx / len(data_frame_list))
        # plt.scatter(df['true'], df['estimate'], label=f'File {idx}', color=color)
        plt.hist(df['true'], bins=100,color=color,alpha=0.5,log=True)
    
    
    # Add a color bar
    norm = plt.Normalize(0, len(data_frame_list))
    sm = plt.cm.ScalarMappable(cmap=cmap, norm=norm)
    sm.set_array([])
    plt.colorbar(sm)


    plt.xlabel('Curie Temperature')
    plt.ylabel('Count')
    plt.title(title)
    # plt.legend()
    plt.show()

def plot_histogram_difference(data_frame_list, title='Histogram Difference'):
    if len(data_frame_list) < 2:
        raise ValueError("data_frame_list must contain at least two data frames.")
    
    # Retrieve the first and last data frames
    df_first = data_frame_list[0]
    df_last = data_frame_list[-1]
    
    # Calculate histograms
    hist_first, bins_first = np.histogram(df_first['true'], bins=100)
    hist_last, bins_last = np.histogram(df_last['true'], bins=bins_first)
    
    # Calculate the difference
    hist_diff =  hist_first - hist_last
    print(hist_diff)
    print(sum(hist_diff))
    
    # Plot the difference
    plt.figure(figsize=(7, 6))
    plt.hist(bins_first[:-1], bins_first, weights=hist_diff, alpha=0.5)
    
    plt.xlabel('True')
    plt.ylabel('Difference in Count')
    plt.title(title)
    plt.show()

In [None]:
plot_true_histgram(data_frame_list, title='True histgram CT')

In [None]:
plot_histogram_difference(data_frame_list, title='Histogram searched CT')

In [None]:
def calculate_mae(data_frame_list):
    mae_list = []
    for df in data_frame_list:
        mae = np.mean(np.abs(df['true'] - df['estimate']))
        mae_list.append(mae)
    return mae_list

def plot_mae(mae_list):
    plt.figure(figsize=(10, 6))
    plt.plot(mae_list, marker='o', linestyle='-', color='b')
    plt.xlabel('File Index')
    plt.ylabel('MAE')
    plt.title('Mean Absolute Error (MAE) for Each File')
    plt.show()

In [None]:
# Calculate MAE
mae_list = calculate_mae(data_frame_list)
# Plot MAE
plot_mae(mae_list)