In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import re
from pathlib import Path
from scipy.optimize import curve_fit
import math

# Surfactant library

In [2]:
surfactant_library = {
    "SDS": {
        "full_name": "Sodium Dodecyl Sulfate",
        "CAS": "151-21-3",
        "CMC": 8.5,
        "Category": "anionic",
        "MW": 289.39,
        "stock_conc": 50,  # mM
        "low": 7,
        "high": 10
    },


    "NaDC": {
        "full_name": "Sodium Docusate",
        "CAS": "577-11-7",
        "CMC": 5.3375,
        "Category": "anionic",
        "MW": 445.57,
        "stock_conc": 25,  # mM
        "low":2.48,
        "high": 8.2
    },

    
    "NaC": {
        "full_name": "Sodium Cholate",
        "CAS": "361-09-1",
        "CMC": 14,
        "Category": "anionic",
        "MW": 431.56,
        "stock_conc": 50,  # mM
        "low": 13,
        "high":15,
    },


    "CTAB": {
        "full_name": "Hexadecyltrimethylammonium Bromide",
        "CAS": "57-09-0",
        "CMC": 1.07,
        "Category": "cationic",
        "MW": 364.45,
        "stock_conc": 5, # mM
        "low": 0.9,
        "high": 1.24,
    },


    "DTAB": {
        "full_name": "Dodecyltrimethylammonium Bromide",
        "CAS": "1119-94-4",
        "CMC": 15.85,
        "Category": "cationic",
        "MW": 308.34,
        "stock_conc": 50,  # mM
        "low": 15.7,
        "high": 16
    },


    "TTAB": {
        "full_name": "Tetradecyltrimethylammonium Bromide",
        "CAS": "1119-97-7",
        "CMC": 3.985,
        "Category": "cationic",
        "MW": 336.39,
        "stock_conc": 50,  # mM
        "low": 3.77,
        "high": 4.2
    },

    "CAPB": {
        "full_name": "Cocamidopropyl Betaine",
        "CAS": "61789-40-0",
        "CMC": 0.627,
        "Category": "zwitterionic",
        "MW": 342.52,
        "stock_conc": 50,  # mM
        "low": 0.28,
        "high": 0.974
    },
    
    "CHAPS": {
        "full_name": "CHAPS",
        "CAS": "75621-03-3",
        "CMC": 8,
        "Category": "zwitterionic",
        "MW": 614.88,
        "stock_conc": 30,  # mM
        "low": 6,
        "high": 10
    }
}

SURFACTANT_ORDER = ['SDS', 'NaDC', 'NaC', 'CTAB', 'DTAB', 'TTAB', 'CAPB', 'CHAPS']

red = '#e64b35'
blue='#4dbbd5'

# Combine raw data

In [3]:
base_dirs = ['mixed_CMC/20250708_175050', 
             'mixed_CMC/20250709_161138', 
             'mixed_CMC/20250710_115037', 
             'mixed_CMC/20250710_173246', 
             'mixed_CMC/20250711_105409', 
             'mixed_CMC/20250711_151511',
             'mixed_CMC/20250714_172927',
             'mixed_CMC/20250715_113617',
             'mixed_CMC/20250715_164825',
             'mixed_CMC/20250716_113918',
             'mixed_CMC/20250716_154102',
             'mixed_CMC/20250806_164154_1a',
             'mixed_CMC/20250807_112908_1b',
             'mixed_CMC/20250811_104850_6',
             'mixed_CMC/20250811_205342_3',
             'mixed_CMC/20250812_133855_8',
             
             ]

pattern = re.compile(
    r'^(?P<prefix>output_data|wellplate_data)_'         # file type
    r'(?P<surfactant1>[^_-]+)-(?P<ratio1>\d+(?:\.\d+)?)_'  # first surfactant & ratio
    r'(?P<surfactant2>[^_-]+)-(?P<ratio2>\d+(?:\.\d+)?)_'  # second surfactant & ratio
    r'(?P<analysis>fine_fine|rough_rough)\.csv$'           # analysis type
)

# 1) Read & combine output_data
out_dfs = []
for base in base_dirs:
    print (base)
    for path in (Path(base)).glob('output_data_*.csv'):
        m = pattern.match(path.name)
        if not m:
            continue
        surf1 = m.group('surfactant1')
        ratio1 = m.group('ratio1')

        surf2 = m.group('surfactant2')
        ratio2 = m.group('ratio2')

        analysis  = m.group('analysis')

        df = pd.read_csv(path, usecols=['replicate','334_373', '334_384', '600'])
        df['ratio']      = df['334_373'] / df['334_384']
        df['surfactant_1'] = surf1
        df['surfactant_1_ratio'] = ratio1

        df['surfactant_2'] = surf2
        df['surfactant_2_ratio'] = ratio2

        df['analysis']      = analysis

        out_dfs.append(df)

combined_raw = pd.concat(out_dfs, ignore_index=True)

# give each measurement a “row-within-group” index
combined_raw['well_idx'] = (
    combined_raw
      .groupby(['surfactant_1','surfactant_1_ratio','surfactant_2','surfactant_2_ratio','analysis'])
      .cumcount()
)

# 2) Read & combine wellplate_data (no averaging!)
well_dfs = []
for base in base_dirs:
    raw_dir = Path(base)
    for path in raw_dir.glob('wellplate_data_*.csv'):
        m = pattern.match(path.name)
        if not m:
            continue

        surf1 = m.group('surfactant1')
        ratio1 = m.group('ratio1')

        surf2 = m.group('surfactant2')
        ratio2 = m.group('ratio2')

        analysis  = m.group('analysis')

        wdf_1 = pd.read_csv(path, usecols=['concentration'])
        wdf = pd.concat([wdf_1]*3, ignore_index=True)

        wdf['surfactant_1'] = surf1
        wdf['surfactant_1_ratio'] = ratio1

        wdf['surfactant_2'] = surf2
        wdf['surfactant_2_ratio'] = ratio2

        wdf['analysis']      = analysis
        well_dfs.append(wdf)

well_meta = pd.concat(well_dfs, ignore_index=True)

# index each well‐plate row within its group
well_meta['well_idx'] = (
    well_meta
      .groupby(['surfactant_1','surfactant_1_ratio','surfactant_2','surfactant_2_ratio','analysis'])
      .cumcount()
)

# 3) Merge on the four keys: surfactant, time, replicate, and well_idx
combined_full = pd.merge(
    combined_raw,
    well_meta,
    on=['surfactant_1','surfactant_1_ratio','surfactant_2','surfactant_2_ratio','analysis', 'well_idx' ],
    how='left'
)

mixed_CMC/20250708_175050
mixed_CMC/20250709_161138
mixed_CMC/20250710_115037
mixed_CMC/20250710_173246
mixed_CMC/20250711_105409
mixed_CMC/20250711_151511
mixed_CMC/20250714_172927
mixed_CMC/20250715_113617
mixed_CMC/20250715_164825
mixed_CMC/20250716_113918
mixed_CMC/20250716_154102
mixed_CMC/20250806_164154_1a
mixed_CMC/20250807_112908_1b
mixed_CMC/20250811_104850_6
mixed_CMC/20250811_205342_3
mixed_CMC/20250812_133855_8


In [4]:
for col in ['surfactant_1_ratio','surfactant_2_ratio']:
    combined_full[col] = combined_full[col].replace({'0.9': '0.95', '0.1': '0.05'})


In [5]:
combined_full

Unnamed: 0,replicate,334_373,334_384,600,ratio,surfactant_1,surfactant_1_ratio,surfactant_2,surfactant_2_ratio,analysis,well_idx,concentration
0,1,157125.0,184034.0,0.0411,0.853782,SDS,0.5,NaC,0.5,rough_rough,0,1.496
1,1,155280.0,185929.0,0.0395,0.835158,SDS,0.5,NaC,0.5,rough_rough,1,2.044
2,1,162266.0,200560.0,0.0396,0.809065,SDS,0.5,NaC,0.5,rough_rough,2,2.793
3,1,158642.0,192976.0,0.0398,0.822082,SDS,0.5,NaC,0.5,rough_rough,3,3.816
4,1,175794.0,221639.0,0.0412,0.793155,SDS,0.5,NaC,0.5,rough_rough,4,5.214
...,...,...,...,...,...,...,...,...,...,...,...,...
4603,3,78533.0,106255.0,0.0392,0.739099,TTAB,0.95,CAPB,0.05,rough_rough,31,5.360
4604,3,73970.0,101745.0,0.0395,0.727014,TTAB,0.95,CAPB,0.05,rough_rough,32,7.650
4605,3,69795.0,97601.0,0.0390,0.715105,TTAB,0.95,CAPB,0.05,rough_rough,33,10.917
4606,3,69465.0,95048.0,0.0394,0.730841,TTAB,0.95,CAPB,0.05,rough_rough,34,15.579


In [6]:
combined_full.to_csv("processed_data/mixed_CMC_data.csv", index=False)
combined_full

Unnamed: 0,replicate,334_373,334_384,600,ratio,surfactant_1,surfactant_1_ratio,surfactant_2,surfactant_2_ratio,analysis,well_idx,concentration
0,1,157125.0,184034.0,0.0411,0.853782,SDS,0.5,NaC,0.5,rough_rough,0,1.496
1,1,155280.0,185929.0,0.0395,0.835158,SDS,0.5,NaC,0.5,rough_rough,1,2.044
2,1,162266.0,200560.0,0.0396,0.809065,SDS,0.5,NaC,0.5,rough_rough,2,2.793
3,1,158642.0,192976.0,0.0398,0.822082,SDS,0.5,NaC,0.5,rough_rough,3,3.816
4,1,175794.0,221639.0,0.0412,0.793155,SDS,0.5,NaC,0.5,rough_rough,4,5.214
...,...,...,...,...,...,...,...,...,...,...,...,...
4603,3,78533.0,106255.0,0.0392,0.739099,TTAB,0.95,CAPB,0.05,rough_rough,31,5.360
4604,3,73970.0,101745.0,0.0395,0.727014,TTAB,0.95,CAPB,0.05,rough_rough,32,7.650
4605,3,69795.0,97601.0,0.0390,0.715105,TTAB,0.95,CAPB,0.05,rough_rough,33,10.917
4606,3,69465.0,95048.0,0.0394,0.730841,TTAB,0.95,CAPB,0.05,rough_rough,34,15.579
