In [None]:
from generator import JAVHModel, JAModel
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

# Set up initial configurations for plots
plt.rcParams.update({
    'font.size': 12,          # Default font size
    'axes.labelsize': 14,     # Axis labels
    'axes.titlesize': 16,     # Subplot titles
    'xtick.labelsize': 12,    # X-axis tick labels
    'ytick.labelsize': 12,    # Y-axis tick labels
    'legend.fontsize': 12,    # Legend text
    'figure.titlesize': 18    # Figure title
})



In [None]:
modelsJAVH = JAVHModel.load_from_csvs('../../output')
modelsJA = JAModel.load_from_csvs('../../output')

In [None]:
modelsJAVH[-1].results_df['mape'].describe()

In [None]:
from generator import javh_groups

In [None]:
modelsJAVH[-1].plot_VH(29,solvent_1='Propylene Glycol', solvent_2='Water')
modelsJAVH[-1].plot(29,all_experimental_data=False)

# Artesunate
# propane-1,2-diol
# Water


# Jouyban Acree Model Statistics

In [None]:
# Create a DataFrame to store all paired t-test results
comparison_results = []

# Loop through consecutive pairs of models
for i in range(len(modelsJA) - 1):
    model1 = modelsJA[i]
    model2 = modelsJA[i + 1]
    
    # Perform paired t-test
    t_stat, p_value, diff = model2.paired_t_test(model1, verbose=False)
    
    # Store key information about the comparison
    comparison_results.append({
        'model1_points': model1.JA_number+ 2,  # Convert x to actual data points
        'model2_points': model2.JA_number+ 2,
        'mape_improvement': diff.mean(),
        't_statistic': abs(t_stat),
        'p_value': p_value,
        'significant': p_value < 0.025
    })

# Convert to DataFrame
comparison_df = pd.DataFrame(comparison_results)

# Plot p-values
non_significant = comparison_df[comparison_df['significant'] == False]

# Add markers for significant results
significant = comparison_df[comparison_df['significant']]


# Create a visualization of the absolute improvement
plt.figure(figsize=(16*1.3/3, 9*1.3/3))

# Plot improvements for non-significant results
plt.plot(non_significant['model1_points'], non_significant['mape_improvement'], 
         'o', color='gray', linewidth=2, markersize=8, 
         label='p > .025')

# Add markers for significant results
plt.scatter(significant['model1_points'], significant['mape_improvement'], 
            s=100, color='red', zorder=5, marker='*', 
            label='p < .025')

plt.axhline(y=0, color='grey',linestyle='--')

# Add labels for each point
for _, row in comparison_df.iterrows():
    y_offset = 10 if row['mape_improvement'] < -0.4 else -20
    plt.annotate(f"→ {row['model2_points']}", 
                 (row['model1_points'], row['mape_improvement']), 
                 textcoords="offset points",
                 xytext=(0, y_offset), 
                 ha='center')

print(comparison_df)

plt.xlabel('Comparison Data Point')
plt.ylabel('Δ MAPE (%)')
plt.grid(True, alpha=0.3)
plt.legend()
plt.tight_layout()
plt.show()

In [None]:
results = pd.DataFrame(columns=['data points', 'p_value', 'f1_score', 'precision', 'recall'])

for i in range(len(modelsJA)-1):
    print(f'{modelsJA[i]} \ncompared to \n{modelsJA[i+1]}')
    modelsJA[i].plot(2)
    modelsJA[i+1].plot(2)
    
    _, p_value,diff = modelsJA[i+1].paired_t_test(modelsJA[i])
    


# Statistics for the Van't Hoff Jouyban Acree Model

In [None]:
num = 7

In [None]:
# Create a DataFrame to store all paired t-test results
comparison_results = []

fixed_modelsJAVH = [model for model in modelsJAVH if model.JA_number == num]

# Loop through consecutive pairs of models
for i in range(len(fixed_modelsJAVH) - 1):
    model1 = fixed_modelsJAVH[i]
    model2 = fixed_modelsJAVH[i + 1]
    
    # Perform paired t-test
    t_stat, p_value, diff = model2.paired_t_test(model1, verbose=False)
    
    # Store key information about the comparison
    comparison_results.append({
        'model1_points': model1.VH_number* 2,  # Convert x to actual data points
        'model2_points': model2.VH_number* 2,
        'mape_improvement': diff.median(),
        't_statistic': abs(t_stat),
        'p_value': p_value,
        'significant': p_value < 0.025
    })

# Convert to DataFrame
comparison_df = pd.DataFrame(comparison_results)

# Plot p-values
non_significant = comparison_df[comparison_df['significant'] == False]

# Add markers for significant results
significant = comparison_df[comparison_df['significant']]


# Create a visualization of the absolute improvement
plt.figure(figsize=(16*1.3/3, 9*1.3/3))

# Plot improvements for non-significant results
plt.plot(non_significant['model1_points'], non_significant['mape_improvement'], 
         'o', color='gray', linewidth=2, markersize=8, 
         label='p>0.025')

# Add markers for significant results
plt.scatter(significant['model1_points'], significant['mape_improvement'], 
            s=100, color='red', zorder=5, marker='*', 
            label='p<0.025')

plt.axhline(y=0, color='grey',linestyle='--')


# Add labels for each point
for _, row in comparison_df.iterrows():
    y_offset = 10 if row['mape_improvement'] < -0.03 else -30
    plt.annotate(f"→ {row['model2_points']}", 
                 (row['model1_points'], row['mape_improvement']), 
                 textcoords="offset points",
                 xytext=(0, y_offset), 
                 ha='center')

print(comparison_df)


plt.xlabel('Comparison Data Point')
plt.ylabel('Δ MAPE (%)')
plt.grid(True, alpha=0.3)
plt.legend()
plt.tight_layout()
plt.show()