In [None]:
from generator import JAVHModel, JAModel
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import seaborn as sns

# Set up initial configurations for plots
plt.rcParams.update({
    'font.size': 12,          # Default font size
    'axes.labelsize': 14,     # Axis labels
    'axes.titlesize': 16,     # Subplot titles
    'xtick.labelsize': 12,    # X-axis tick labels
    'ytick.labelsize': 12,    # Y-axis tick labels
    'legend.fontsize': 12,    # Legend text
    'figure.titlesize': 18    # Figure title
})



In [None]:
palette = sns.color_palette('colorblind', 10)
palette

In [None]:
from generator import javh_groups

In [None]:
modelsJAVH = JAVHModel.load_from_csvs('../../output')
modelsJA = JAModel.load_from_csvs('../../output')

In [None]:
modelsJA[-1].plot(6,solvent_1_name='Ethane-1,2-diol — Water Weight Fraction',api='3-chloropyrazin-2-amine Exp. Data',color=palette[0])

# 3-chloropyrazin-2-amine 276224
# 283.15K


In [None]:
modelsJA[-1].plot(2,solvent_1_name='Ethane-1,2-diol — Water Weight Fraction',api='Iminodibenzyl Exp. Data',color=palette[0])

# Iminodibenzyl  10308
# ethane-1,2-diol
# Water
# Temperature 318.15k

In [None]:
modelsJA[-1].plot(6,solvent_1_name='Ethane-1,2-diol — Water Weight Fraction',api='Iminodibenzyl Exp. Data',color=palette[0])

# 276224 Compound ID
# 174
# 962
# 283.15k

In [None]:
import matplotlib.gridspec as gridspec

# Create a figure with a specific layout
fig = plt.figure(figsize=(16*1.3/1.5, 9*1.3/1.5))

# Create grid specification for the layout
gs = gridspec.GridSpec(2, 2, height_ratios=[1, 1], width_ratios=[1, 1])

# Create the first subplot (takes up the whole top row)
ax1 = fig.add_subplot(gs[0, 0])
ax2 = fig.add_subplot(gs[0, 1])

# Create the second and third subplots (bottom row)
ax3 = fig.add_subplot(gs[1, 0])
ax4 = fig.add_subplot(gs[1, 1])

modelsJAVH[-1].plot(29,all_experimental_data=False,api_name='Artesunate Exp. Data', solvent_1_name='Propane-1,2-diol — Water Weight Fraction',color=palette[0],ax=ax1)
modelsJAVH[-1].plot(2,all_experimental_data=False, api_name='Sofosbuvir Exp. Data', solvent_1_name='Acetone — Water Weight Fraction',color=palette[0],ax=ax2)
modelsJA[-1].plot(6,solvent_1_name='Ethane-1,2-diol — Water Weight Fraction',api='Iminodibenzyl Exp. Data',color=palette[0],ax=ax3)
modelsJA[-1].plot(6,solvent_1_name='Ethane-1,2-diol — Water Weight Fraction',api='3-chloropyrazin-2-amine Exp. Data',color=palette[0],ax=ax4)

# Adjust y-axis scaling for each subplot
for ax in [ax1, ax3, ax4]:
    # Scale values by 1000
    formatter = plt.FuncFormatter(lambda x, pos: '{:.1f}'.format(x*1000))
    ax.yaxis.set_major_formatter(formatter)
    ax.set_ylabel("Solubility (mg/g)")


# Adjust layout
plt.tight_layout()

fig.show()

In [None]:
modelsJAVH[-1].plot(29,all_experimental_data=False,api_name='Artesunate Exp. Data', solvent_1_name='Propane-1,2-diol — Water Weight Fraction',color=palette[0])

# Artesunate
# propane-1,2-diol
# Water
# 298.15k


In [None]:
modelsJAVH[-1].plot(2,all_experimental_data=False, api_name='Sofosbuvir Exp. Data', solvent_1_name='Acetone — Water Weight Fraction',color=palette[0])

# 45375808 Sofosbuvir 
# 180 acetone solvent 1
# water sovlent 2
# 298.15k


# Jouyban Acree Model Statistics

In [None]:
# Create a DataFrame to store all paired t-test results
comparison_results = []

# Loop through consecutive pairs of models
for i in range(len(modelsJA) - 1):
    model1 = modelsJA[i]
    model2 = modelsJA[i + 1]
    
    # Perform paired t-test
    t_stat, p_value, diff = model2.paired_t_test(model1, verbose=False)
    
    # Store key information about the comparison
    comparison_results.append({
        'model1_points': model1.JA_number + 2,
        'model2_points': model2.JA_number + 2,
        'mape_improvement': diff.mean(),
        't_statistic': abs(t_stat),
        'p_value': p_value,
        'significant': p_value < 0.025
    })

# Convert to DataFrame
comparison_df = pd.DataFrame(comparison_results)

# Create visualization using a bar chart
plt.figure(figsize=(16*1.3/3, 9*1.3/3))

# Create x labels for the comparison
x_labels = [f"{row['model1_points']}→{row['model2_points']}" for _, row in comparison_df.iterrows()]

# Create colors and hatches based on significance
colors = [palette[0] if sig else palette[7] for sig in comparison_df['significant']]

# Plot bars
bars = plt.bar(x_labels, comparison_df['mape_improvement'], color=colors)


# Add a horizontal line at y=0
plt.axhline(y=0, color=palette[7], linestyle='--')

print(comparison_df)

# Create legend
plt.legend([plt.Rectangle((0,0),1,1, color=palette[7]), 
            plt.Rectangle((0,0),1,1, color=palette[0])], 
           ['p > .025', 'p < .025'])

plt.xlabel('Model Comparison (Data Points)')
plt.ylabel('Δ MAPE (%)')
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()


In [None]:
results = pd.DataFrame(columns=['data points', 'p_value', 'f1_score', 'precision', 'recall'])

for i in range(len(modelsJA)-1):
    print(f'{modelsJA[i]} \ncompared to \n{modelsJA[i+1]}')
    modelsJA[i].plot(2)
    modelsJA[i+1].plot(2)
    
    _, p_value,diff = modelsJA[i+1].paired_t_test(modelsJA[i])
    


# Statistics for the Van't Hoff Jouyban Acree Model

In [None]:
num = 7

In [None]:
# Create a DataFrame to store all paired t-test results
comparison_results = []

fixed_modelsJAVH = [model for model in modelsJAVH if model.JA_number == num]

# Loop through consecutive pairs of models
for i in range(len(fixed_modelsJAVH) - 1):
    model1 = fixed_modelsJAVH[i]
    model2 = fixed_modelsJAVH[i + 1]
    
    # Perform paired t-test
    t_stat, p_value, diff = model2.paired_t_test(model1, verbose=False)
    
    # Store key information about the comparison
    comparison_results.append({
        'model1_points': model1.VH_number* 2,  # Convert x to actual data points
        'model2_points': model2.VH_number* 2,
        'mape_improvement': diff.median(),
        't_statistic': abs(t_stat),
        'p_value': p_value,
        'significant': p_value < 0.025
    })

# Convert to DataFrame
comparison_df = pd.DataFrame(comparison_results)

# Create visualization using a bar chart
plt.figure(figsize=(16*1.3/3, 9*1.3/3))

# Create x labels for the comparison
x_labels = [f"{row['model1_points']}→{row['model2_points']}" for _, row in comparison_df.iterrows()]

# Create colors and hatches based on significance
colors = ['#ff000080' if sig else 'lightgray' for sig in comparison_df['significant']]
hatches = ['//' if sig else '' for sig in comparison_df['significant']]

# Plot bars
bars = plt.bar(x_labels, comparison_df['mape_improvement'], color=colors, hatch=hatches, edgecolor=colors)


# Add a horizontal line at y=0
plt.axhline(y=0, color='gray', linestyle='--')

print(comparison_df)

# Create legend
plt.legend([plt.Rectangle((0,0),1,1, color='lightgray'), 
            plt.Rectangle((0,0),1,1, color='#ff000080', hatch='//')], 
           ['p > .025', 'p < .025'])

plt.xlabel('Model Comparison (Data Points)')
plt.ylabel('Δ MAPE (%)')
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()