# RI to Mole Fraction Predictor

## Overview
This tool determines the **mole fraction of cyclohexane** in a **cyclohexane:ethanol solution** based on its measured **refractive index (RI)**. 

The calibration was performed using a fitted curve from experimental RI measurements of cyclohexane-ethanol mixtures across a range of compositions.

## How It Works
- **Interpolation**: When an RI value falls within the calibration range, the tool uses linear interpolation to estimate the cyclohexane mole fraction
- **Extrapolation**: When an RI value falls outside the calibration range, the tool performs linear extrapolation using the nearest data segment and issues a warning

## Important Notes
- ‚ö†Ô∏è **Interpolated values** (within the calibration range) are generally reliable for estimating cyclohexane composition
- ‚ö†Ô∏è **Extrapolated values** (outside the calibration range) have higher uncertainty and should be interpreted carefully
- All predictions are logged for reference and can be cleared between sessions
- This relationship is specific to cyclohexane-ethanol mixtures at the temperature and conditions of the calibration

## Data Source
Uses fitted RI data from: `data/RI-Fitting-Data.xlsx` (Sheet1)

In [None]:
import pandas as pd
import numpy as np
import ipywidgets as widgets
from IPython.display import display, clear_output, HTML

# Global log to track all predictions
prediction_log = []

def load_fitted_curve(excel_path="RI_testing.xlsx", sheet_name="Sheet3 (2)"):
    """
    Loads mole fraction x and fitted/predicted RI (Y) from the Excel workbook.
    Returns arrays x (mole fraction) and y (predicted RI) sorted by y.
    """
    df = pd.read_excel(excel_path, sheet_name=sheet_name, engine="openpyxl")
    required_cols = ["Mole Fraction", "predicted Y"]
    for col in required_cols:
        if col not in df.columns:
            raise ValueError(f"Column '{col}' not found in sheet '{sheet_name}'. "
                             f"Found columns: {list(df.columns)}")
    df = df.dropna(subset=required_cols).copy()
    x = df["Mole Fraction"].to_numpy(dtype=float)
    y = df["predicted Y"].to_numpy(dtype=float)
    idx = np.argsort(y)
    y_sorted = y[idx]
    x_sorted = x[idx]
    return x_sorted, y_sorted

def invert_ri_to_x_with_extrapolation(y_target, x_sorted, y_sorted):
    """
    If y_target is outside anchors, perform linear extrapolation using the
    nearest terminal segment and flag in diagnostics.
    """
    y_min, y_max = y_sorted[0], y_sorted[-1]
    increasing = y_max >= y_min

    if (increasing and (y_min <= y_target <= y_max)) or (not increasing and (y_max <= y_target <= y_min)):
        idx = np.searchsorted(y_sorted, y_target)
        idx = np.clip(idx, 1, len(y_sorted) - 1)
        y1, y2 = y_sorted[idx-1], y_sorted[idx]
        x1, x2 = x_sorted[idx-1], x_sorted[idx]
        t = 0.0 if y2 == y1 else (y_target - y1) / (y2 - y1)
        x_est = x1 + t * (x2 - x1)
        return x_est, {"status": "interpolated", "segment": (idx-1, idx)}
    else:
        if (increasing and y_target < y_min) or (not increasing and y_target > y_min):
            y1, y2 = y_sorted[0], y_sorted[1]
            x1, x2 = x_sorted[0], x_sorted[1]
            segment_name = "head"
        else:
            y1, y2 = y_sorted[-2], y_sorted[-1]
            x1, x2 = x_sorted[-2], x_sorted[-1]
            segment_name = "tail"

        slope = 0.0 if (y2 == y1) else (x2 - x1) / (y2 - y1)
        x_est = x1 + slope * (y_target - y1)
        
        warning = None
        if x_est < 0.0:
            warning = f"Extrapolated mole fraction {x_est:.4f} is below physical minimum (0.0)"
        elif x_est > 1.0:
            warning = f"Extrapolated mole fraction {x_est:.4f} exceeds physical maximum (1.0)"

        return x_est, {
            "status": "extrapolated_linear",
            "segment": segment_name,
            "warning": warning,
            "y_range": (y_min, y_max)
        }

def predict_mole_fraction_for_RI(y_target, excel_path="RI_testing.xlsx", sheet_name="Sheet3 (2)"):
    """Wrapper: loads data, inverts RI->x, and returns x along with diagnostics."""
    x_sorted, y_sorted = load_fitted_curve(excel_path, sheet_name)
    x_est, info = invert_ri_to_x_with_extrapolation(y_target, x_sorted, y_sorted)
    return x_est, info

def create_interactive_predictor():
    """Create and display the interactive prediction interface."""
    
    # Load data once to get calibration range info
    try:
        x_sorted, y_sorted = load_fitted_curve('data/RI-Fitting-Data.xlsx', 'Sheet1')
        ri_min, ri_max = float(y_sorted.min()), float(y_sorted.max())
        x_min, x_max = float(x_sorted.min()), float(x_sorted.max())
        data_loaded = True
    except:
        ri_min, ri_max, x_min, x_max = None, None, None, None
        data_loaded = False
    
    ri_input = widgets.FloatText(
        value=1.45, description='RI Value:', step=0.001,
        style={'description_width': '100px'}
    )
    
    submit_button = widgets.Button(description='Predict Cyclohexane Mole Fraction', button_style='info')
    clear_button = widgets.Button(description='Clear Log', button_style='warning')
    export_button = widgets.Button(description='Export Log', button_style='success')
    quit_button = widgets.Button(description='Quit', button_style='danger')
    output_area = widgets.Output()
    
    # Fixed paths (no user input needed)
    excel_path = 'data/RI-Fitting-Data.xlsx'
    sheet_name = 'Sheet1'
    
    def on_submit_clicked(button):
        with output_area:
            try:
                x_est, info = predict_mole_fraction_for_RI(
                    ri_input.value, excel_path=excel_path,
                    sheet_name=sheet_name
                )
                prediction_log.append({
                    'ri_input': ri_input.value, 'x_estimate': x_est,
                    'status': info.get('status'), 'warning': info.get('warning')
                })
                print(f"\n{'='*60}")
                print(f"Input RI = {ri_input.value:.6f}")
                print(f"Predicted Cyclohexane Mole Fraction (x_C6H12) ‚âà {x_est:.6f}")
                print(f"Status: {info.get('status')}")
                if info.get('warning'):
                    print(f"‚ö†Ô∏è  WARNING: {info['warning']}")
                print(f"{'='*60}")
                print(f"\nüìã PREDICTION LOG ({len(prediction_log)} total):")
                print("-" * 60)
                for i, entry in enumerate(prediction_log, 1):
                    print(f"{i}. RI={entry['ri_input']:.6f} ‚Üí x_C6H12={entry['x_estimate']:.6f} ({entry['status']})")
                    if entry['warning']:
                        print(f"   ‚ö†Ô∏è  {entry['warning']}")
            except Exception as e:
                print(f"‚ùå Error: {str(e)}")
    
    def on_clear_clicked(button):
        global prediction_log
        with output_area:
            prediction_log = []
            clear_output(wait=True)
            print("üìã Log cleared. Ready for new predictions.")
    
    def on_export_clicked(button):
        with output_area:
            if not prediction_log:
                print("‚ùå No predictions to export.")
                return
            try:
                filename = 'RI_predictions_log.txt'
                with open(filename, 'w') as f:
                    f.write("RI to Mole Fraction Prediction Log\n")
                    f.write("="*60 + "\n\n")
                    for i, entry in enumerate(prediction_log, 1):
                        f.write(f"{i}. RI={entry['ri_input']:.6f} ‚Üí x={entry['x_estimate']:.6f} ({entry['status']})\n")
                        if entry['warning']:
                            f.write(f"   WARNING: {entry['warning']}\n")
                print(f"‚úÖ Log exported to '{filename}' ({len(prediction_log)} predictions)")
            except Exception as e:
                print(f"‚ùå Export failed: {str(e)}")
    
    def on_quit_clicked(button):
        with output_area:
            clear_output(wait=True)
            print("üëã Exiting interactive mode.")
    
    submit_button.on_click(on_submit_clicked)
    clear_button.on_click(on_clear_clicked)
    export_button.on_click(on_export_clicked)
    quit_button.on_click(on_quit_clicked)
    
    input_box = widgets.VBox([
        widgets.HTML("<h4>Cyclohexane:Ethanol Solutions</h4>"),
        widgets.HTML("<p><b>Instructions:</b> Enter the refractive index (RI) of your cyclohexane-ethanol solution and click 'Predict Cyclohexane Mole Fraction' to estimate the cyclohexane composition. "
                     "All predictions are logged for tracking. Use 'Export Log' to save results, 'Clear Log' to reset, or 'Quit' to exit.</p>"),
        ri_input,
        widgets.HBox([submit_button, export_button, clear_button, quit_button])
    ])
    
    display(input_box)
    display(output_area)
    
    with output_area:
        print("‚úÖ Ready! Enter an RI value and click 'Predict Mole Fraction' to begin.")
        if data_loaded:
            print(f"üìä Calibration Range: RI = [{ri_min:.6f}, {ri_max:.6f}] ‚Üí x = [{x_min:.6f}, {x_max:.6f}]")
            print(f"   ‚ÑπÔ∏è  Values within this range use interpolation (more reliable)")
            print(f"   ‚ö†Ô∏è  Values outside this range use extrapolation (higher uncertainty)")
        print("üìã All predictions will be logged below.")


create_interactive_predictor()